PD_midbrain_snRNAseq_GSE157783

PD_midbrain_snRNAseq_GSE157783

PAPER
“Single-cell sequencing of the human midbrain reveals glial activation and a neuronal state specific to Parkinson’s disease”
5 iPD and 6 Healthy control midbrain snRNAseq

Resources

#---------- Seurat  
https://satijalab.org/seurat/vignettes.html  
https://ucdavis-bioinformatics-training.github.io/2017_2018-single-cell-RNA-sequencing-Workshop-UCD_UCB_UCSF/day2/scRNA_Workshop-PART1.html  
https://hbctraining.github.io/scRNA-seq/lessons/04_SC_quality_control.html
https://scrnaseq-course.cog.sanger.ac.uk/website/biological-analysis.html#pseudotime-analysis
https://liulab-dfci.github.io/bioinfo-combio/scrna1.html#intro-to-scrna-seq
http://bioconductor.org/books/release/OSCA/integrating-datasets.html#performing-mnn-correction

#---------- Scanpy – Single-Cell Analysis in Python
https://scanpy.readthedocs.io/en/stable/

#---------- Brain single nuclei Papers  
"A single-cell atlas of the human substantia nigra reveals cell-specific pathways associated with neurological disorders" contains celltype markers

SYNAPSE - create project to download sequencing data
https://www.synapse.org/#!Synapse:syn5550382
https://www.nature.com/articles/s41586-019-1195-2
"Single-cell transcriptomic analysis of Alzheimer’s disease" contains celltype markers

https://portal.brain-map.org/atlases-and-data/rnaseq
https://portal.brain-map.org/atlases-and-data/rnaseq/human-m1-10x
https://www.biorxiv.org/content/10.1101/2020.03.31.016972v2
"Evolution of cellular diversity in primary motor cortex of human, marmoset monkey, and mouse"

"Integrative single-cell analysis of transcriptional and epigenetic states in the human adult brain"contains celltype markers

#---------- Multiplexing
https://www.nature.com/articles/s41467-019-10756-2
"Nuclei multiplexing with barcoded antibodies for single-nucleus genomics"

ASAP-SEQ
https://cite-seq.com/asapseq/
https://www.biorxiv.org/content/10.1101/2020.09.08.286914v1

#---------- Multiomic single cell papers
https://www.nature.com/articles/s41467-018-03149-4
"scNMT-seq enables joint profiling of chromatin accessibility DNA methylation and transcription in single cells"

#---------- Sample Deconvolution 
DemuxEM - a computational tool that detects inter-sample multiplets and assigns singlets to their sample of origin
https://kco-cloud.readthedocs.io/en/latest/hashing_cite_seq.html

cellsnp-lite - assigns cells to donors and detects doublets, even without genotyping reference
https://github.com/single-cell-genetics/cellsnp-lite

scSplit - Genotype-free demultiplexing of pooled single-cell RNA-seq, using a hidden state model for identifying genetically distinct samples within a mixed population
https://github.com/jon-xu/scSplit

demuxlet - Genetic multiplexing of barcoded single cell RNA-seq
https://github.com/statgen/demuxlet

#---------- Doublets
DoubletFinder - an R package that predicts doublets in single-cell RNA sequencing data
#object_filtered <- subset(x = object, idents = "doublet", invert = TRUE)
#object_filtered <- subset(x = object, subset = "CD3E" > EXP_VALUE, invert = TRUE)

Packages & Session

setwd("/Volumes/projects_secondary/bras/Lee/ASAP/PD_midbrain_snRNAseq_GSE157783")
knitr::opts_chunk$set(warning=FALSE, message=FALSE, cache=FALSE, error=FALSE, cache.lazy=FALSE)
#knitr::opts_knit$set(root.dir = '/Volumes/projects_secondary/bras/Lee/PPMI/DLB')
#options(scipen=999)
#options(stringsAsFactors = FALSE)

# Chunk Options  
# include = FALSE, runs code, prevents code and results from appearing  
# echo = FALSE, runs code, prevents code from appearing but not the results  
# eval = FALSE, does not run code  
# message = FALSE prevents messages that are generated by code from appearing 
# warning = FALSE prevents warnings that are generated by code from appearing 
# fig.cap = "..." adds a caption to graphical results.
#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("fgsea")
#install.packages('metap')
library(corrplot)
library(cowplot)
library(data.table)
library(DoubletFinder)
library(dplyr)
library(fgsea)
library(ggplot2)
library(ggpubr)
library(gprofiler2)
library(knitr)
library(kableExtra)
library(lubridate)
library(magrittr)
library(metap)
library(multtest)
library(parallel)
library(purrr)
library(RColorBrewer)
library(readr)
library(reshape2)
library(reticulate)
library(rmarkdown)
library(Seurat)
library(stringr)
library(tibble)
library(tidyr)
library(tidyverse)
library(viridis)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] viridis_0.5.1       viridisLite_0.3.0   forcats_0.5.0      
##  [4] tidyverse_1.3.0     tidyr_1.1.2         tibble_3.0.5       
##  [7] stringr_1.4.0       Seurat_3.2.3        rmarkdown_2.6      
## [10] reticulate_1.18     reshape2_1.4.4      readr_1.4.0        
## [13] RColorBrewer_1.1-2  purrr_0.3.4         multtest_2.46.0    
## [16] Biobase_2.50.0      BiocGenerics_0.36.0 metap_1.4          
## [19] magrittr_2.0.1      lubridate_1.7.9.2   kableExtra_1.3.1   
## [22] knitr_1.31          gprofiler2_0.2.0    ggpubr_0.4.0       
## [25] ggplot2_3.3.3       fgsea_1.16.0        dplyr_1.0.3        
## [28] DoubletFinder_2.0.3 data.table_1.13.6   cowplot_1.1.1      
## [31] corrplot_0.84      
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1         backports_1.2.1      fastmatch_1.1-0     
##   [4] sn_1.6-2             plyr_1.8.6           igraph_1.2.6        
##   [7] lazyeval_0.2.2       splines_4.0.3        BiocParallel_1.24.1 
##  [10] listenv_0.8.0        scattermore_0.7      TH.data_1.0-10      
##  [13] digest_0.6.27        htmltools_0.5.1.1    fansi_0.4.2         
##  [16] tensor_1.5           cluster_2.1.0        ROCR_1.0-11         
##  [19] openxlsx_4.2.3       globals_0.14.0       modelr_0.1.8        
##  [22] matrixStats_0.57.0   sandwich_3.0-0       colorspace_2.0-0    
##  [25] rvest_0.3.6          ggrepel_0.9.1        haven_2.3.1         
##  [28] rbibutils_2.0        xfun_0.20            crayon_1.3.4        
##  [31] jsonlite_1.7.2       spatstat.data_1.7-0  spatstat_1.64-1     
##  [34] survival_3.2-7       zoo_1.8-8            glue_1.4.2          
##  [37] polyclip_1.10-0      gtable_0.3.0         webshot_0.5.2       
##  [40] leiden_0.3.7         car_3.0-10           future.apply_1.7.0  
##  [43] abind_1.4-5          scales_1.1.1         mvtnorm_1.1-1       
##  [46] DBI_1.1.1            rstatix_0.7.0        miniUI_0.1.1.1      
##  [49] Rcpp_1.0.6           plotrix_3.8-1        xtable_1.8-4        
##  [52] tmvnsim_1.0-2        rsvd_1.0.3           foreign_0.8-81      
##  [55] stats4_4.0.3         htmlwidgets_1.5.3    httr_1.4.2          
##  [58] TFisher_0.2.0        ellipsis_0.3.1       ica_1.0-2           
##  [61] pkgconfig_2.0.3      dbplyr_2.0.0         uwot_0.1.10         
##  [64] deldir_0.2-9         tidyselect_1.1.0     rlang_0.4.10        
##  [67] later_1.1.0.1        munsell_0.5.0        cellranger_1.1.0    
##  [70] tools_4.0.3          cli_2.2.0            generics_0.1.0      
##  [73] broom_0.7.5          mathjaxr_1.0-1       ggridges_0.5.3      
##  [76] evaluate_0.14        fastmap_1.1.0        goftest_1.2-2       
##  [79] yaml_2.2.1           fs_1.5.0             fitdistrplus_1.1-3  
##  [82] zip_2.1.1            RANN_2.6.1           nlme_3.1-151        
##  [85] pbapply_1.4-3        future_1.21.0        mime_0.9            
##  [88] xml2_1.3.2           compiler_4.0.3       rstudioapi_0.13     
##  [91] plotly_4.9.3         curl_4.3             png_0.1-7           
##  [94] ggsignif_0.6.1       spatstat.utils_2.0-0 reprex_1.0.0        
##  [97] stringi_1.5.3        lattice_0.20-41      Matrix_1.3-2        
## [100] vctrs_0.3.6          mutoss_0.1-12        pillar_1.4.7        
## [103] lifecycle_0.2.0      Rdpack_2.1           lmtest_0.9-38       
## [106] RcppAnnoy_0.0.18     irlba_2.3.3          gbRd_0.4-11         
## [109] httpuv_1.5.5         patchwork_1.1.1      R6_2.5.0            
## [112] promises_1.1.1       KernSmooth_2.23-18   gridExtra_2.3       
## [115] rio_0.5.26           parallelly_1.23.0    codetools_0.2-18    
## [118] MASS_7.3-53          assertthat_0.2.1     withr_2.4.1         
## [121] sctransform_0.3.2    mnormt_2.0.2         multcomp_1.4-15     
## [124] mgcv_1.8-33          hms_1.0.0            rpart_4.1-15        
## [127] grid_4.0.3           carData_3.0-4        Rtsne_0.15          
## [130] numDeriv_2016.8-1.1  shiny_1.6.0

Cellranger

cellranger-4.0.0
genome annotation indexed with gtf containing unspliced genes

#--------- Download SRA data from NCBI

https://www.ncbi.nlm.nih.gov/Traces/study/?acc=PRJNA662780&o=acc_s%3Aa

module load sratoolkit/2.10.8
qsubs -d -a srr_names -n sratoolkit_prefetch -s "prefetch -X 20G SAMPLE"
qsubs -d -a srr_names -n sartoolkit_fastqdump -s "fastq-dump --split-files /home/lee.marshall/bras_primary/software/sratoolkit/ncbi/sra/sra/SAMPLE.sra --outdir fastq"

#--------- Rename fastq samples

while read line ; do mv $(awk '{print $1"_1.fastq",$2"_I1_001.fastq"}' <<<$line) ; done < ../srr_samples
while read line ; do mv $(awk '{print $1"_2.fastq",$2"_R1_001.fastq"}' <<<$line) ; done < ../srr_samples
while read line ; do mv $(awk '{print $1"_3.fastq",$2"_R2_001.fastq"}' <<<$line) ; done < ../srr_samples

#--------- gzip all fastq files

qsubs -a sample_names -n gzip_fastq -s "gzip fastq/SAMPLE"

#---------- premRNA GTF
 
awk 'BEGIN{FS="\t"; OFS="\t"} $3 == "transcript"{ print; $3="exon"; $9 = gensub("(transcript_id\\s\"{0,1})([^;\"]+)(\"{0,1});", "\\1\\2_premrna\\3;", "g", $9); print; next}{print}' \
refdata-gex-GRCh38-2020-A/genes/genes.gtf > refdata-gex-GRCh38-2020-A.premrna.gtf
 
 
#---------- premRNA Cellranger Reference
 
module load cellranger/cellranger-4.0.0
qsubs -n cellranger_mkref -s "cellranger mkref --genome=refdata-gex-GRCh38-2020-A_premrna --fasta=refdata-gex-GRCh38-2020-A/fasta/genome.fa --genes=refdata-gex-GRCh38-2020-A.premrna.gtf"
 
 
#---------- premRNA Cellranger Count
 
module load cellranger/cellranger-4.0.0
qsubs -n cellranger_count -c 5 -w 48 -a sample_names -s "cellranger count --sample=SAMPLE --id=SAMPLE_count --fastqs=./fastq --chemistry=SC3Pv3 --transcriptome=/home/lee.marshall/bras_primary/software/cellranger/refdata-gex-GRCh38-2020-A_premrna"

Seurat QC

Quality control graphs used to identify filtering thresholds

#---------- Seurat_Object
# Get files
dir <- "counts/"
samples <- paste0(c("IPD1", "IPD2", "IPD3", "IPD4", "IPD5", 
                    "C1", "C2", "C3", "C4", "C5", "C6"), "_count")
# Count Matrix
seurat_counts <- lapply(samples, function(i){
  count <- Read10X(data.dir=paste0(dir,i,"/outs/filtered_feature_bc_matrix/"),
                  gene.column=2, # col1 Ensemble, col2 Symbol
                  unique.features=TRUE,
                  strip.suffix=TRUE) 
  seurat_obj <- CreateSeuratObject(counts=count, 
                                   project=i,
                                   min.cells=10,
                                   min.genes=200)
})
# Merge Seurat Objects
seurat_combined <- merge(x = seurat_counts[[1]], 
                               y = seurat_counts[2:length(seurat_counts)],
                               add.cell.ids = unlist(samples), 
                               project="Aging_Mice_GSE129788")
# Alternative Seurat Object
#d10x.data <- sapply(samples, function(i){
#  d10x <- Read10X(file.path(dir,i,"/outs/filtered_feature_bc_matrix/"))
#  colnames(d10x) <- paste(sapply(strsplit(colnames(d10x),split="-"),'[[',1L),i,sep="-")
#  d10x
#})
#d10x.cbind <- do.call("cbind", d10x.data)
#d10x.combined <- CreateSeuratObject(
#  counts=d10x.cbind,
#  project="DLB_snRNAseq", 
#  min.cells = 10,
#  min.genes = 200,
#  names.field = 2,
#  names.delim = "\\-")

#---------- Add Metadata
#View(seurat_combined@meta.data)
# orig.ident: contains the sample identity if known
# nCount_RNA: number of UMIs per cell
# nFeature_RNA: number of genes detected per cell
# Add number of genes per UMI for each cell to metadata
seurat_combined$log10GenesPerUMI <- log10(seurat_combined$nFeature_RNA) / log10(seurat_combined$nCount_RNA)
# Compute percent mito ratio
seurat_combined$mitoRatio <- PercentageFeatureSet(object = seurat_combined, pattern = "^MT-")
seurat_combined$mitoRatio <- seurat_combined@meta.data$mitoRatio / 100
# Create metadata dataframe
metadata <- seurat_combined@meta.data
# Add cell IDs to metadata
metadata$cells <- rownames(metadata)
# Rename columns
metadata <- metadata %>%
        dplyr::rename(seq_folder = orig.ident,
                      nUMI = nCount_RNA,
                      nGene = nFeature_RNA)
# Create sample column
metadata$sample <- NA
metadata$sample[which(str_detect(metadata$cells,"^IPD1"))] <- "IPD1"
metadata$sample[which(str_detect(metadata$cells,"^IPD2"))] <- "IPD2"
metadata$sample[which(str_detect(metadata$cells,"^IPD3"))] <- "IPD3"
metadata$sample[which(str_detect(metadata$cells,"^IPD4"))] <- "IPD4"
metadata$sample[which(str_detect(metadata$cells,"^IPD5"))] <- "IPD5"
metadata$sample[which(str_detect(metadata$cells,"^C1"))] <- "HC1"
metadata$sample[which(str_detect(metadata$cells,"^C2"))] <- "HC2"
metadata$sample[which(str_detect(metadata$cells,"^C3"))] <- "HC3"
metadata$sample[which(str_detect(metadata$cells,"^C4"))] <- "HC4"
metadata$sample[which(str_detect(metadata$cells,"^C5"))] <- "HC5"
metadata$sample[which(str_detect(metadata$cells,"^C6"))] <- "HC6"

# Create group column
metadata$group <- NA
metadata$group[which(str_detect(metadata$cells,"^IPD1"))] <- "IPD"
metadata$group[which(str_detect(metadata$cells,"^IPD2"))] <- "IPD"
metadata$group[which(str_detect(metadata$cells,"^IPD3"))] <- "IPD"
metadata$group[which(str_detect(metadata$cells,"^IPD4"))] <- "IPD"
metadata$group[which(str_detect(metadata$cells,"^IPD5"))] <- "IPD"
metadata$group[which(str_detect(metadata$cells,"^C1"))] <- "HC"
metadata$group[which(str_detect(metadata$cells,"^C2"))] <- "HC"
metadata$group[which(str_detect(metadata$cells,"^C3"))] <- "HC"
metadata$group[which(str_detect(metadata$cells,"^C4"))] <- "HC"
metadata$group[which(str_detect(metadata$cells,"^C5"))] <- "HC"
metadata$group[which(str_detect(metadata$cells,"^C6"))] <- "HC"

# Add metadata back to Seurat object
seurat_combined@meta.data <- metadata

# rm data
rm(seurat_counts, metadata)

# Create .RData object to load at any time
save(seurat_combined, file="seurat_combined.RData")
#---------- Cell_Counts
# Load data
load("seurat_combined.RData")

# Visualize the number of cell counts per sample
seurat_combined@meta.data %>% 
    ggplot(aes(x=sample, fill=sample)) + 
    geom_bar() + 
    ggtitle("Number of Cells") + 
    theme_classic() + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + 
    theme(plot.title = element_text(hjust=0.5, face="bold"))

#---------- UMI_Counts_per_cell (transcripts)
# Visualize the number UMIs/transcripts per cell
seurat_combined@meta.data %>% 
    ggplot(aes(color=sample, x=nUMI, fill= sample)) + 
    geom_density(alpha = 0.2) + 
    geom_vline(xintercept = 1000, linetype="dashed", colour = "red3") + 
    geom_vline(xintercept = 20000, linetype="dashed", colour = "red3") + 
    ggtitle("Number of UMIs/Transcripts per Cell") + 
    ylab("Cell density") + 
    scale_x_log10() + 
    theme_classic() + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + 
    theme(plot.title = element_text(hjust=0.5, face="bold"))

#---------- Genes_per_cell
# Visualize the distribution of genes detected per cell via histogram
p1 <- seurat_combined@meta.data %>% 
    ggplot(aes(color=sample, x=nGene, fill= sample)) + 
    geom_density(alpha = 0.2) + 
    geom_vline(xintercept = 500, linetype="dashed", colour = "red3") + 
    geom_vline(xintercept = 10000, linetype="dashed", colour = "red3") + 
    ggtitle("Number of Genes per Cell") + 
    scale_x_log10() + 
    theme_classic() +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + 
    theme(plot.title = element_text(hjust=0.5, face="bold"))

# Visualize the distribution of genes detected per cell via boxplot
p2 <- seurat_combined@meta.data %>% 
    ggplot(aes(x=sample, y=log10(nGene), fill=sample)) + 
    geom_boxplot() + 
    ggtitle("Number of Genes vs Number of Cells") +   
    theme_classic() +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
    theme(plot.title = element_text(hjust=0.5, face="bold"))

# Plots
cowplot::plot_grid(p1, p2, ncol=2)

#---------- UMI_Counts_vs_Genes
# Visualize the correlation between genes detected and number of UMIs/transcripts and determine whether strong presence of cells with low numbers of genes/UMIs
seurat_combined@meta.data %>% 
    ggplot(aes(x=nUMI, y=nGene, color=mitoRatio)) + 
    geom_point() + 
    geom_vline(xintercept = 1000, linetype="dashed", colour = "red3") + 
    geom_vline(xintercept = 20000, linetype="dashed", colour = "red3") + 
    geom_hline(yintercept = 500, linetype="dashed", colour = "red3") + 
    geom_hline(yintercept = 10000, linetype="dashed", colour = "red3") + 
    ggtitle("Number of UMIs/Transcripts vs Number of Cells") +   
    stat_smooth(method=lm) + 
    scale_x_log10() + 
    scale_y_log10() + 
    theme_classic() + 
    scale_colour_viridis_c() +
    facet_wrap(~sample)

#---------- Mitochondrial_counts_ratio
# Visualize the distribution of mitochondrial gene expression detected per cell
seurat_combined@meta.data %>% 
    ggplot(aes(color=sample, x=mitoRatio, fill=sample)) + 
    geom_density(alpha = 0.2) + 
    geom_vline(xintercept = 0.20, linetype="dashed", colour = "red3") + 
    ggtitle("Mitochondrial Counts Ratio") + 
    ylab("Cell density") + 
    scale_x_log10() + 
    theme_classic()

#---------- Complexity
# Visualize the overall complexity of the gene expression by visualizing the genes detected per UMI
seurat_combined@meta.data %>%
    ggplot(aes(x=log10GenesPerUMI, color = sample, fill=sample)) + 
    geom_density(alpha = 0.2) +
    geom_vline(xintercept = 0.85, linetype="dashed", colour = "red3") + 
    ggtitle("Complexity Genes per UMI") + 
    ylab("Cell density") + 
    theme_classic()

Seurat Filtering

Remove poor quilty cells and doublets
Keep high quality cells without removing biologically relevant cells
* nUMI >= 1000
* nUMI <= 20000
* nGene >= 500
* nGene <= 10000
* mitoRatio < 0.20
* log10GenesPerUMI > 0.85

#---------- Seurat_Filtering
# Filter out low quality reads using selected thresholds - these will change with experiment
seurat_filtered <- subset(x = seurat_combined, 
                         subset= (nUMI >= 1000) & 
                           (nUMI <= 20000) & 
                           (nGene >= 500) & 
                           (nGene <= 10000) &
                           (mitoRatio < 0.20) & 
                           (log10GenesPerUMI > 0.85))

# Create .RData object to load at any time
save(seurat_filtered, file="seurat_filtered.RData")
#---------- Cell_Counts_Pre_and_Post_Filtered 
# Load data
load("seurat_filtered.RData")

# Number of Cells
nCells_pre <- seurat_combined@meta.data$sample %>% 
                      table() %>% 
                      as.data.frame() %>% 
                      dplyr::rename(Sample=".")

nCells_post <- seurat_filtered@meta.data$sample %>% 
                      table() %>% 
                      as.data.frame() %>% 
                      dplyr::rename(Sample=".")

cbind(nCells_pre, nCells_post) %>% kable(caption="Number of Cells", format.args = list(big.mark = ",")) %>% kable_styling(c("striped", "bordered")) %>% add_header_above(c("Pre-Filtered" = 2, "Post-Filtered" = 2))
Number of Cells
Pre-Filtered
Post-Filtered
Sample Freq Sample Freq
HC1 5,561 HC1 4,386
HC2 5,520 HC2 4,644
HC3 2,628 HC3 2,418
HC4 5,266 HC4 4,926
HC5 3,101 HC5 2,799
HC6 7,186 HC6 6,071
IPD1 2,964 IPD1 2,520
IPD2 7,180 IPD2 6,318
IPD3 4,296 IPD3 3,906
IPD4 2,797 IPD4 2,454
IPD5 6,621 IPD5 5,694
# Violin_plots
p1 <- VlnPlot(object = seurat_combined, features = c("nGene", "nUMI", "mitoRatio", "log10GenesPerUMI"), ncol = 4, pt.size=0, group.by="sample") 

p2 <- VlnPlot(object = seurat_filtered, features = c("nGene", "nUMI", "mitoRatio", "log10GenesPerUMI"), ncol = 4, pt.size=0, group.by="sample") 

cowplot::plot_grid(p1, p2, labels=c("Pre-Filtered", "Post-Filtered"), nrow=2)

# Scatter_plots
p1 <- FeatureScatter(object = seurat_combined, feature1 = "nGene", feature2 = "nUMI", pt.size=0.01, group.by="sample")
p2 <- FeatureScatter(object = seurat_combined, feature1 = "nGene", feature2 = "mitoRatio", pt.size=0.01, group.by="sample")
p3 <- FeatureScatter(object = seurat_filtered, feature1 = "nGene", feature2 = "nUMI", pt.size=0.01, group.by="sample")
p4 <- FeatureScatter(object = seurat_filtered, feature1 = "nGene", feature2 = "mitoRatio", pt.size=0.01, group.by="sample")

# Plots
cowplot::plot_grid(p1, p3, p2, p4, labels=c("Pre-Filtered", "Post-Filtered"), nrow=2, ncol=2)

# Remove data
rm(seurat_combined)

Seurat Cell Cycle Scoring

Determine if cell cycle regression is required

#---------- Cell_Cycle_Scoring
# Normalize the counts
seurat_phase <- NormalizeData(seurat_filtered)

# Load cell cycle markers
load(file="seurat_cycle.rda")

# Score cells for cell cycle
seurat_phase <- CellCycleScoring(seurat_phase, 
                                 g2m.features = g2m_genes, 
                                 s.features = s_genes)

# Identify the most variable genes
seurat_phase <- FindVariableFeatures(seurat_phase, 
                     selection.method = "vst",
                     nfeatures = 2000, 
                     verbose = FALSE)

# Scale the counts
seurat_phase <- ScaleData(seurat_phase)

# Perform PCA
seurat_phase <- RunPCA(seurat_phase)

# Create .RData object to load at any time
save(seurat_phase, file="seurat_phase.RData")
# Load data
load("seurat_phase.RData")

# Plot the PCA colored by cell cycle phase
DimPlot(seurat_phase,
        reduction = "pca",
        group.by= "Phase",
        split.by = "Phase")

# you can regress out the s and g2m score
# alternatively you could only regress out the s-g2m score to keep non-cuyling and cycling cell differences
DimPlot(seurat_phase,
        reduction = "pca",
        group.by= "Phase",
        split.by = "sample")

# Remove data
rm(seurat_phase)

Seurat Batch Correction

Standard batch correction is not considered standard anymore
SCTransform batch correction allows for better normalization
SCTransform method models the UMI counts using a regularized negative binomial
model to remove the variation due to sequencing depth (total nUMIs per cell),
while adjusting the variance based on pooling information across genes with
similar abundances (similar to some bulk RNA-seq methods).

#---------- Batch_Correction_SCTransform
# The data is preprocessed by splitting into individual ‘assays’. 
# In this experiment, each individual sample is considered an ‘assay’.
# Ensure the dataset used here is the non-normalized dataset
DefaultAssay(seurat_filtered) <- "RNA"
# Split seurat object by condition to perform cell cycle scoring and SCT on all samples
seurat_split <- SplitObject(seurat_filtered, split.by = "sample")
load(file="seurat_cycle.rda")
for (i in 1:length(seurat_split)) {
    seurat_split[[i]] <- NormalizeData(seurat_split[[i]], 
                                       verbose = TRUE)
    seurat_split[[i]] <- CellCycleScoring(seurat_split[[i]], 
                                          g2m.features=g2m_genes, 
                                          s.features=s_genes)
    seurat_split[[i]] <- SCTransform(seurat_split[[i]], 
                                     vars.to.regress = c("mitoRatio"))
}

# Select the most variable features to use for integration
seurat_features <- SelectIntegrationFeatures(object.list = seurat_split, 
                                            nfeatures = 3000) 
# Prepare the SCT list object for integration
seurat_split <- PrepSCTIntegration(object.list = seurat_split, 
                                   anchor.features = seurat_features)
# Find best buddies - can take a while to run
seurat_anchors <- FindIntegrationAnchors(object.list = seurat_split, 
                                        normalization.method = "SCT", 
                                        anchor.features = seurat_features)
# Integrate across conditions
seurat_integrated <- IntegrateData(anchorset = seurat_anchors, 
                                   normalization.method = "SCT")
# Run PCA
seurat_integrated <- RunPCA(object = seurat_integrated)
ElbowPlot(object = seurat_integrated, 
          ndims = 50)
# Run UMAP
seurat_integrated <- RunUMAP(seurat_integrated, 
                             dims = 1:40,
                                   reduction = "pca")
# Create .RData object to load at any time
save(seurat_integrated, file="seurat_integrated.RData")
# Remove data
rm(seurat_filtered, seurat_anchors, seurat_split)
# Load data
load(file="seurat_integrated.RData")


# Plot PCA
PCAPlot(seurat_integrated,
        group.by = "sample",
        split.by = "group") 

PCAPlot(seurat_integrated,
        group.by = "sample") 

# Explore heatmap of PCs
DimHeatmap(seurat_integrated, 
           dims = 1:9, 
           cells = 500, 
           balanced = TRUE)

# Printing out the most variable genes driving PCs
print(x = seurat_integrated[["pca"]], 
      dims = 1:9, 
      nfeatures = 5)
## PC_ 1 
## Positive:  PLXDC2, DOCK8, ARHGAP15, APBB1IP, LNCAROD 
## Negative:  FAM155A, FGF14, CSMD1, LRRTM4, OPCML 
## PC_ 2 
## Positive:  ST18, PLP1, CTNNA3, PEX5L, RNF220 
## Negative:  FRMD4A, PLXDC2, ARHGAP24, LRMDA, SLC8A1 
## PC_ 3 
## Positive:  HPSE2, LINC01088, TRPM3, GPC5, SPARCL1 
## Negative:  SLC8A1, CNTNAP2, LRRTM4, FRMD4A, CSMD1 
## PC_ 4 
## Positive:  EPAS1, FLT1, COBLL1, CLDN5, ABCB1 
## Negative:  HPSE2, TRPM3, LINC01088, NRG3, AC012405.1 
## PC_ 5 
## Positive:  ROBO2, SYT1, NRG1, LINGO2, TENM2 
## Negative:  VCAN, TNR, MEGF11, PTPRZ1, PDZD2 
## PC_ 6 
## Positive:  ATP10A, ELOVL7, BTNL9, ABCB1, ANO2 
## Negative:  SLC6A12, DCN, NOTCH3, PTH1R, LAMA2 
## PC_ 7 
## Positive:  CFAP54, CFAP299, C8orf34, DNAH11, SPAG17 
## Negative:  TRPM3, GPC5, OBI1-AS1, LSAMP, SLC1A2 
## PC_ 8 
## Positive:  PLCB1, LINC02055, RORB, OTX2-AS1, SYNPR 
## Negative:  DPP10, TENM2, DCLK1, RBFOX1, KCNB2 
## PC_ 9 
## Positive:  GPC5, SLC1A2, CACNB2, CABLES1, NRXN1 
## Negative:  DPP10, CPAMD8, AC012405.1, CD44, DCLK1
# Plot the elbow plot
ElbowPlot(object = seurat_integrated, 
          ndims = 50)

# Plot UMAP                             
DimPlot(seurat_integrated, reduction = "umap", group.by = "sample")

Seurat Clustering_Cells

Seurat uses a graph-based clustering approach, which embeds cells in a graph
structure, using a K-nearest neighbor (KNN) graph (by default), with edges
drawn between cells with similar gene expression patterns.

The resolution is an important argument that sets the “granularity” of the
downstream clustering and will need to be optimized for every individual
experiment. For datasets of 3,000 - 5,000 cells, the resolution set between
0.4-1.4 generally yields good clustering. Increased resolution values lead to a
greater number of clusters, which is often required for larger datasets.

#---------- Clustering_Cells
# Determine the K-nearest neighbor graph
seurat_cluster <- FindNeighbors(object = seurat_integrated, 
                                dims = 1:40)

# Determine the clusters for various resolutions                                
seurat_cluster <- FindClusters(object = seurat_cluster,
                               resolution = c(0.2, 0.4, 0.6, 0.8, 1.0, 1.2))
# Explore resolutions
# seurat_cluster@meta.data %>% View()

# Remove data
rm(seurat_integrated)

# Create .RData object to load at any time
save(seurat_cluster, file="seurat_cluster.RData")
# Load data
load("seurat_cluster.RData")

#---------- Explore resolutions
# Assign identity of clusters
Idents(object = seurat_cluster) <- "integrated_snn_res.1.2"

# Plot UMAP                             
DimPlot(seurat_cluster, reduction = "umap", label = TRUE)
UMAP_0.4

UMAP_0.4

Seurat UMAP_Summary

UMAP resolution used 1.2

Determine whether clusters represent true cell types or cluster due to
biological or technical variation, such as clusters of cells in the S phase of
the cell cycle, clusters of specific batches, or cells with high mitochondrial
content.

# Extract identity and sample information from seurat object to determine the number of cells per cluster per sample
nCells <- FetchData(seurat_cluster, 
                     vars = c("ident", "sample")) %>%
           dplyr::count(ident, sample) %>%
           tidyr::spread(ident, n)

# View table
#View(n_cells)
kable(nCells, caption="Number of Cells", format.args = list(big.mark = ",")) %>% kable_styling(c("striped", "bordered"))
Number of Cells
sample 0 1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 27 28 29 3 30 4 5 6 7 8 9
HC1 449 515 132 87 104 192 158 155 105 125 95 66 388 37 11 53 49 31 49 8 5 9 21 391 3 151 122 187 191 205 292
HC2 540 395 139 102 95 198 117 144 106 148 60 80 367 18 145 41 57 7 56 16 3 10 1 297 8 136 149 242 298 216 453
HC3 370 352 150 50 114 67 65 86 51 103 13 21 200 3 5 10 14 7 10 20 2 4 4 138 6 104 128 42 126 86 67
HC4 895 746 208 148 153 69 128 175 145 89 57 34 405 47 33 34 19 64 10 32 NA 10 2 372 4 215 211 82 176 288 75
HC5 314 291 110 77 93 144 82 115 86 64 26 24 261 28 8 22 15 14 20 14 4 5 7 265 1 121 106 60 129 154 139
HC6 747 647 170 83 218 252 177 198 113 127 103 211 473 30 15 88 112 13 53 19 NA 32 5 511 10 188 186 594 223 275 198
IPD1 237 217 176 110 147 99 63 58 50 54 16 60 167 29 38 21 22 29 9 8 15 7 7 163 10 215 185 82 68 79 79
IPD2 484 374 263 237 151 127 231 93 219 99 535 143 392 294 114 75 47 109 27 28 27 11 18 470 7 291 278 293 328 276 277
IPD3 278 259 200 239 133 115 126 89 100 93 64 66 282 113 76 11 24 9 12 10 60 11 5 253 4 412 266 143 178 158 117
IPD4 233 202 76 62 79 49 122 76 108 66 101 29 198 23 10 43 20 3 4 13 NA 11 1 263 3 99 187 64 93 147 69
IPD5 494 452 277 539 193 141 125 126 92 164 22 118 317 44 85 94 64 17 26 20 48 32 19 305 27 635 315 306 256 160 181
# UMAP of cells in each cluster by sample
DimPlot(seurat_cluster, 
        label = TRUE, 
        split.by = "sample")  + NoLegend()

# plot
FeaturePlot(seurat_cluster, 
            reduction = "umap", 
            features = c("nUMI", "nGene", "log10GenesPerUMI", "mitoRatio"),
            pt.size = 0.4, 
            sort.cell = TRUE,
            min.cutoff = 'q10',
            label = TRUE)

# cell phases
nCells_phase <-FetchData(seurat_cluster, 
                          vars = c("ident", "Phase")) %>%
                dplyr::count(ident, Phase) %>%
                tidyr::spread(ident, n)
# View table
#View(n_cells)

kable(nCells, caption="Number of Cells", format.args = list(big.mark = ",")) %>% kable_styling(c("striped", "bordered"))
Number of Cells
sample 0 1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 27 28 29 3 30 4 5 6 7 8 9
HC1 449 515 132 87 104 192 158 155 105 125 95 66 388 37 11 53 49 31 49 8 5 9 21 391 3 151 122 187 191 205 292
HC2 540 395 139 102 95 198 117 144 106 148 60 80 367 18 145 41 57 7 56 16 3 10 1 297 8 136 149 242 298 216 453
HC3 370 352 150 50 114 67 65 86 51 103 13 21 200 3 5 10 14 7 10 20 2 4 4 138 6 104 128 42 126 86 67
HC4 895 746 208 148 153 69 128 175 145 89 57 34 405 47 33 34 19 64 10 32 NA 10 2 372 4 215 211 82 176 288 75
HC5 314 291 110 77 93 144 82 115 86 64 26 24 261 28 8 22 15 14 20 14 4 5 7 265 1 121 106 60 129 154 139
HC6 747 647 170 83 218 252 177 198 113 127 103 211 473 30 15 88 112 13 53 19 NA 32 5 511 10 188 186 594 223 275 198
IPD1 237 217 176 110 147 99 63 58 50 54 16 60 167 29 38 21 22 29 9 8 15 7 7 163 10 215 185 82 68 79 79
IPD2 484 374 263 237 151 127 231 93 219 99 535 143 392 294 114 75 47 109 27 28 27 11 18 470 7 291 278 293 328 276 277
IPD3 278 259 200 239 133 115 126 89 100 93 64 66 282 113 76 11 24 9 12 10 60 11 5 253 4 412 266 143 178 158 117
IPD4 233 202 76 62 79 49 122 76 108 66 101 29 198 23 10 43 20 3 4 13 NA 11 1 263 3 99 187 64 93 147 69
IPD5 494 452 277 539 193 141 125 126 92 164 22 118 317 44 85 94 64 17 26 20 48 32 19 305 27 635 315 306 256 160 181
# Explore whether clusters segregate by cell cycle phase
DimPlot(seurat_cluster,
        label = TRUE, 
        split.by = "Phase")  + NoLegend()

# plot
FeaturePlot(seurat_cluster, 
            reduction = "umap", 
            features = c("S.Score", "G2M.Score"),
            pt.size = 0.4, 
            sort.cell = TRUE,
            min.cutoff = 'q10',
            label = TRUE)

# Defining the information in the seurat object of interest
# Extracting this data from the seurat object
umap_data <- FetchData(seurat_cluster, 
                     vars = c(paste0("PC_", 1:16), 
                              "ident", "UMAP_1", "UMAP_2"))

# Extract the UMAP coordinates for the first 10 cells
#seurat_cluster@reductions$umap@cell.embeddings[1:10, 1:2]
# Adding cluster label to center of cluster on UMAP
umap_label <- FetchData(seurat_cluster, 
                        vars = c("ident", "UMAP_1", "UMAP_2"))  %>%
               group_by(ident) %>%
               summarise(x=mean(UMAP_1), y=mean(UMAP_2))

# Plotting a UMAP plot for each of the PCs
map(paste0("PC_", 1:16), function(pc){
        ggplot(umap_data, 
               aes(UMAP_1, UMAP_2)) +
                geom_point(aes_string(color=pc), 
                           alpha = 0.7) +
                scale_color_gradient(guide = FALSE, 
                                     low = "grey90", 
                                     high = "blue")  +
                geom_text(data=umap_label, 
                          aes(label=ident, x, y)) +
                ggtitle(pc)
}) %>% 
        plot_grid(plotlist = .)

# Explore heatmap of PCs
DimHeatmap(seurat_cluster, 
           dims = 1:9, 
           cells = 500, 
           balanced = TRUE)

# Printing out the most variable genes driving PCs
print(x = seurat_cluster[["pca"]], 
      dims = 1:9, 
      nfeatures = 5)
## PC_ 1 
## Positive:  PLXDC2, DOCK8, ARHGAP15, APBB1IP, LNCAROD 
## Negative:  FAM155A, FGF14, CSMD1, LRRTM4, OPCML 
## PC_ 2 
## Positive:  ST18, PLP1, CTNNA3, PEX5L, RNF220 
## Negative:  FRMD4A, PLXDC2, ARHGAP24, LRMDA, SLC8A1 
## PC_ 3 
## Positive:  HPSE2, LINC01088, TRPM3, GPC5, SPARCL1 
## Negative:  SLC8A1, CNTNAP2, LRRTM4, FRMD4A, CSMD1 
## PC_ 4 
## Positive:  EPAS1, FLT1, COBLL1, CLDN5, ABCB1 
## Negative:  HPSE2, TRPM3, LINC01088, NRG3, AC012405.1 
## PC_ 5 
## Positive:  ROBO2, SYT1, NRG1, LINGO2, TENM2 
## Negative:  VCAN, TNR, MEGF11, PTPRZ1, PDZD2 
## PC_ 6 
## Positive:  ATP10A, ELOVL7, BTNL9, ABCB1, ANO2 
## Negative:  SLC6A12, DCN, NOTCH3, PTH1R, LAMA2 
## PC_ 7 
## Positive:  CFAP54, CFAP299, C8orf34, DNAH11, SPAG17 
## Negative:  TRPM3, GPC5, OBI1-AS1, LSAMP, SLC1A2 
## PC_ 8 
## Positive:  PLCB1, LINC02055, RORB, OTX2-AS1, SYNPR 
## Negative:  DPP10, TENM2, DCLK1, RBFOX1, KCNB2 
## PC_ 9 
## Positive:  GPC5, SLC1A2, CACNB2, CABLES1, NRXN1 
## Negative:  DPP10, CPAMD8, AC012405.1, CD44, DCLK1

Seurat Marker_Identification

Generate cell type-specific clusters and use known markers to determine the
identities of the clusters.

Seurat’s FeaturePlot() function let’s us easily explore the known markers on top of our UMAP visualizations. Let’s go through and determine the identities of the clusters. To access the expression levels of all genes, rather than just the 3000 most highly variable genes, we can use the normalized count data stored in the RNA assay slot.

#---------- Marker_Identification
# Select the RNA counts slot to be the default assay
DefaultAssay(seurat_cluster) <- "RNA"

# Normalize RNA data for visualization purposes
seurat_markers <- NormalizeData(seurat_cluster, verbose = FALSE)

# Create .RData object to load at any time
save(seurat_markers, file="seurat_markers.RData")
#---------- Marker_Identification
## remove the x-axis text and tick
## plot.margin to adjust the white space between each plot.
## ... pass any arguments to VlnPlot in Seurat
vlnplot_gg <- function(obj, 
                          feature, 
                          pt.size = 0, 
                          plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"),
                          ...){
  p <- VlnPlot(obj, features = feature, pt.size = pt.size, ... ) + 
    xlab("") + 
    ylab(feature) + 
    ggtitle("") + 
    theme(legend.position = "none", 
          axis.text.x = element_blank(), 
          axis.ticks.x = element_blank(), 
          axis.title.y = element_text(size = rel(1), angle = 0), 
          axis.text.y = element_text(size = rel(1)), 
          plot.margin = plot.margin ) 
  return(p)
}

## extract the max value of the y axis
vlnplot_max<- function(p){
  ymax<- max(ggplot_build(p)$layout$panel_scales_y[[1]]$range$range)
  return(ceiling(ymax))
}

## main function
vlnplot_stacked <- function(obj, features,
                          pt.size = 0, 
                          plot.margin = unit(c(-0.75, 0, -0.75, 0), "cm"),
                          ...){
  plot_list<- purrr::map(features, function(x) vlnplot_gg(obj = obj,
                                                          feature = x, ...))
  # Add back x-axis title to bottom plot. patchwork is going to support this?
  plot_list[[length(plot_list)]] <- plot_list[[length(plot_list)]] + 
                                      theme(axis.text.x=element_text(), 
                                            axis.ticks.x = element_line())
  # change the y-axis tick to only max value 
  ymaxs <- purrr::map_dbl(plot_list, vlnplot_max)
  plot_list <- purrr::map2(plot_list, ymaxs, function(x,y) x + 
                             scale_y_continuous(breaks = c(y)) + 
                             expand_limits(y = y))
  p <- patchwork::wrap_plots(plotlist = plot_list, ncol = 1)
  return(p)
}
# Load data
load("seurat_markers.RData")

#---------- UMAP
DimPlot(seurat_markers, reduction="umap", label = TRUE)

# Original 25 dinemensions 0.4 resolution

#---------- Astrocyte_Markers 
# AQP4, GFAP (5,10,12,18)
vlnplot_stacked(obj = seurat_markers, 
               features = c("AQP4", "GFAP"))

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("AQP4", "GFAP"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- CADPS2
# CADPS2, TIAM1 (27)
vlnplot_stacked(obj = seurat_markers, 
               features = c("CADPS2", "TIAM1"))

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("CADPS2", "TIAM1"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- Dopaminergic_Markers 
# TH, SLC6A3 ()
vlnplot_stacked(obj = seurat_markers, 
               features = c("TH", "SLC6A3"))

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("TH", "SLC6A3"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- Endothelial_Markers
# FLT1, DUSP1, RGS5, CLDN5 (6)
vlnplot_stacked(obj = seurat_markers, 
               features = c("FLT1", "DUSP1", "RGS5", "CLDN5"))

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("FLT1", "DUSP1", "RGS5", "CLDN5"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- Ependymal_Markers
# FOXJ1 (21)
vlnplot_stacked(obj = seurat_markers, 
               features = c("FOXJ1"))

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("FOXJ1"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- Excitatory_Markers
# SATB2, SLC17A6 (13,25)
vlnplot_stacked(obj = seurat_markers, 
               features = c("SATB2", "SLC17A6"))

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("SATB2", "SLC17A6"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- GABA_Markers
# GRIK1 (22)
vlnplot_stacked(obj = seurat_markers, 
               features = c("GRIK1"))

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("GRIK1"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- Inhibitory_Markers
# GAD1, GAD2 (9)
vlnplot_stacked(obj = seurat_markers, 
               features = c("GAD1", "GAD2"))

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("GAD1", "GAD2"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- Microglia_Markers
# CSF3R, APBB1IP, CD74 (4,11,20,29,30)
vlnplot_stacked(obj = seurat_markers, 
               features = c("CSF3R", "APBB1IP", "CD74")) 

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("CSF3R", "APBB1IP", "CD74"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- Oligodendrocytes_Markers
# MOG, MOBP (0,1,2,3,8,14,15,16,24,26)
vlnplot_stacked(obj = seurat_markers, 
               features = c("MOG", "MOBP")) 

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("MOG", "MOBP"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- OPC_Markers
# OLIG1, VCAN (7,17)
vlnplot_stacked(obj = seurat_markers, 
               features = c("OLIG1", "VCAN"))

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("OLIG1", "VCAN"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

#---------- Pericyte_Markers
# PDGFRB (19,23,28)
vlnplot_stacked(obj = seurat_markers, 
               features = c("PDGFRB"))

FeaturePlot(seurat_markers, 
            reduction = "umap",
            features = c("PDGFRB"), 
            min.cutoff = "q10", 
            max.cutoff = "q90",
            label = TRUE)

Seurat Cluster_Annotation

#---------- Cluster_Annotation
#manually add annotations derived above
seurat_anno <- RenameIdents(seurat_markers, 
                                `0` = "Oligodendrocytes",
                                `1` = "Oligodendrocytes",
                                `2` = "Oligodendrocytes",
                                `3` = "Oligodendrocytes",
                                `4` = "Microglia",
                                `5` = "Astrocytes",
                                `6` = "Endothelial",
                                `7` = "OPC",
                                `8` = "Oligodendrocytes",
                                `9` = "Inhibitory",
                                `10` = "Astrocytes",
                                `11` = "Microglia",
                                `12` = "Astrocytes",
                                `13` = "Excitatory",
                                `14` = "Oligodendrocytes",
                                `15` = "Oligodendrocytes",
                                `16` = "Oligodendrocytes",
                                `17` = "OPC",
                                `18` = "Astrocytes",
                                `19` = "Pericytes",
                                `20` = "Microglia",
                                `21` = "Ependymal",
                                `22` = "GABA",
                                `23` = "Pericytes",
                                `24` = "Oligodendrocytes",
                                `25` = "Excitatory",
                                `26` = "Oligodendrocytes",
                                `27` = "CADPS2",
                                `28` = "Pericytes",
                                `29` = "Microglia",
                                `30` = "Microglia")

#---------- UMAP
DimPlot(seurat_anno, reduction="umap", label = TRUE)

# save these annotations in meta.data
seurat_anno@meta.data$cluster_annotation <- Idents(seurat_anno)

# Create .RData object to load at any time
save(seurat_anno, file="seurat_anno.RData")
# Load data
load("seurat_anno.RData")

# Remove data
rm(seurat_markers)

# UMAP cell clusters
DimPlot(seurat_anno, reduction="umap", label = TRUE, label.size = 3, repel = TRUE)

# may want to ask if we are truely seperating interneurons from excitatory
DotPlot(seurat_anno,
        features = c("AQP4", "GFAP", # Astrocyte_Markers
                     "CADPS2", "TIAM1", # CADPS2_Markers
                     "FLT1", "DUSP1", # Endothelial_Markers
                     "SATB2", "SLC17A7", # Excitatory_Markers
                     "GRIK1", # GABA_Markers
                     "GAD1", "GAD2", # Inhibitory_Markers
                     "CSF3R", "APBB1IP", # Microglia_Markers
                     "MOG", "MOBP", # Oligodendrocytes_Markers
                     "OLIG1", "VCAN", # OPC_Markers
                     "PDGFRB"), # Pericyte_Markers
        cols = c("blue", "red"), 
        dot.scale = 8) + 
  RotatedAxis()

# If we want to remove a cell cluster
#seurat_subset <- subset(seurat_integrated, idents = "Endothelial_cells", invert = TRUE)

#---------- Cluster_Cell_Counts
# Visualize the number of cell counts per cluster
seurat_anno@meta.data %>% 
    ggplot(aes(x=cluster_annotation, fill=sample)) + 
    geom_bar() + 
    ggtitle("Number of Cells") + 
    theme_classic() + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + 
    theme(plot.title = element_text(hjust=0.5, face="bold")) + 
    scale_fill_brewer(palette="Dark2")

# percentage of cell per cluster per sample
seurat_anno@meta.data %>% 
  dplyr::count(cluster_annotation, sample) %>% 
  dplyr::group_by(sample) %>% 
  dplyr::mutate(freq = n / sum(n)) %>% 
  ggplot(aes(x=cluster_annotation, y=freq, fill=sample)) + 
  geom_bar(position="dodge", stat="identity") + 
    ggtitle("Percentage of Cells") + 
    theme_classic() + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + 
    theme(plot.title = element_text(hjust=0.5, face="bold")) + 
    scale_fill_brewer(palette="Dark2")

Seurat Conserved_Markers

Identification of all markers for each cluster
This type of analysis is typically recommended for when evaluating a single
sample group/condition. With the FindAllMarkers() function we are comparing each
cluster against all other clusters to identify potential marker genes. The cells
in each cluster are treated as replicates, and essentially a differential
expression analysis is performed with some statistical test. Also, by default
this function will return to you genes that exhibit both positive and negative
expression changes. Typically, we add an argument only.pos to opt for keeping only the positive changes.

Identification of conserved markers in all conditions
Since we have samples representing different conditions in our dataset, our best
option is to find conserved markers. This function internally separates out
cells by sample group/condition, and then performs differential gene expression
testing for a single specified cluster against all other clusters (or a second
cluster, if specified). Gene-level p-values are computed for each condition and
then combined across groups using meta-analysis methods from the MetaDE R
package. Before we start our marker identification we will explicitly set our
default assay, we want to use the original counts and not the integrated data.
DefaultAssay(seurat_anno) <- “RNA”

#---------- Conserved_Markers
# Load data
load("seurat_anno.RData")

# RNA stores the original and normalized counts for finding markers
DefaultAssay(seurat_anno) <- "RNA"

# Ensembl & Symbol Gene IDs 
dir <- "counts/"
samples <- list.files(dir)
Gene_Emsembl_Symbol <- read_tsv(paste0(dir,samples[[1]],
                            "/outs/filtered_feature_bc_matrix/features.tsv.gz"),
                     col_names = c("Gene_Emsembl","Gene_Symbol","Assay_type"), 
                     col_types="ccc")[,c(1:2)] %>% 
                       unique()

cell_types <- seurat_anno$cluster_annotation %>% 
  unique() %>% as.character() %>% sort()
  
#---------- Find Conserved markers across sample groups
get_conserved <- function(seurat_rna,ident_name){
  FindConservedMarkers(seurat_rna,
                       ident.1 = ident_name, 
                       grouping.var = "group", 
                       only.pos = TRUE, 
                       min.diff.pct = 0.1, 
                       min.pct = 0.1, 
                       logfc.threshold = 0.25) %>% 
    rownames_to_column(var = "Gene_Symbol") %>% 
    left_join(Gene_Emsembl_Symbol, by="Gene_Symbol") %>% 
    dplyr::mutate(Conserved_Markers = ident_name)
  }

Conserved_Markers <- mclapply(cell_types, function(i) {
  get_conserved(seurat_anno,i) 
}) %>% bind_rows()

# save the results
write_delim(Conserved_Markers, 
            "PD_midbrain_snRNAseq_GSE157783_Conserved_Markers.txt", 
            delim = "\t")

#----- Pathways
get_gprofiler <- function(markers,ident_name){ 
   genes <- markers %>% 
      dplyr::filter(Conserved_Markers == ident_name)  
   gost(query = genes$Gene_Symbol, 
        organism = "hsapiens",
        correction_method = "g_SCS")$result %>% 
      dplyr::filter(p_value <= 0.05) %>% 
      dplyr::mutate(Conserved_Markers = ident_name) %>% 
      dplyr::select(-parents) 
  }

gprofiler_pathways <- mclapply(cell_types, function(i) {
  get_gprofiler(Conserved_Markers,i) 
}) %>% bind_rows()

# save the results
write_delim(as.data.frame(gprofiler_pathways), 
            "PD_midbrain_snRNAseq_GSE157783_Conserved_Markers_gprofiler_pathways.txt", 
            delim = "\t")
# Read Marker files
Conserved_Markers <- read_delim("PD_midbrain_snRNAseq_GSE157783_Conserved_Markers.txt", 
                            "\t", escape_double = FALSE, trim_ws = TRUE)

# Number of Conserved_Markers
Conserved_Markers %>% 
  dplyr::count(Conserved_Markers) %>% 
  kable(caption="Number of Conserved_Markers", 
        format.args = list(big.mark = ",")) %>% 
  kable_styling(c("striped", "bordered"))
Number of Conserved_Markers
Conserved_Markers n
Astrocytes 850
CADPS2 410
Endothelial 1,335
Ependymal 1,235
Excitatory 1,128
GABA 910
Inhibitory 1,037
Microglia 964
Oligodendrocytes 987
OPC 809
Pericytes 1,228
# Extract top 10 markers per cluster
Conserved_Markers %>% 
  group_by(Conserved_Markers) %>% 
  top_n(n = 10, 
        wt = max_pval) %>% 
  kable(caption="Top10 Conserved_Markers", 
        format.args = list(big.mark = ",")) %>% 
  kable_styling(c("striped", "bordered"))
Top10 Conserved_Markers
Gene_Symbol IPD_p_val IPD_avg_logFC IPD_pct.1 IPD_pct.2 IPD_p_val_adj HC_p_val HC_avg_logFC HC_pct.1 HC_pct.2 HC_p_val_adj max_pval minimump_p_val Gene_Emsembl Conserved_Markers
SEPTIN9 0.00e+00 0.4750577 0.328 0.152 0.0000000 0.0000000 0.3227437 0.254 0.151 0.0000000 0.0000000 0.00e+00 ENSG00000184640 Astrocytes
AZIN1-AS1 0.00e+00 0.3445284 0.299 0.198 0.0000000 0.0000000 0.5029874 0.470 0.288 0.0000000 0.0000000 0.00e+00 ENSG00000253320 Astrocytes
DPH6-DT 0.00e+00 0.4956806 0.546 0.393 0.0000000 0.0000000 0.4100117 0.445 0.341 0.0000000 0.0000000 0.00e+00 ENSG00000248079 Astrocytes
NRP2 0.00e+00 0.3184634 0.286 0.183 0.0000000 0.0000000 0.4687946 0.348 0.181 0.0000000 0.0000000 0.00e+00 ENSG00000118257 Astrocytes
FAM27C 0.00e+00 0.4698023 0.361 0.219 0.0000000 0.0000000 0.3636731 0.340 0.237 0.0000000 0.0000000 0.00e+00 ENSG00000231527 Astrocytes
GOLIM4 0.00e+00 0.2800258 0.377 0.272 0.0000000 0.0000000 0.4300831 0.402 0.239 0.0000000 0.0000000 0.00e+00 ENSG00000173905 Astrocytes
AC046134.2 0.00e+00 0.3517161 0.439 0.338 0.0000000 0.0000000 0.4073069 0.540 0.417 0.0000000 0.0000000 0.00e+00 ENSG00000248932 Astrocytes
LTBP3 0.00e+00 0.4081220 0.344 0.225 0.0000000 0.0000000 0.3802234 0.348 0.247 0.0000000 0.0000000 0.00e+00 ENSG00000168056 Astrocytes
WDR6 0.00e+00 0.3676120 0.322 0.206 0.0000000 0.0000000 0.3226289 0.270 0.166 0.0000000 0.0000000 0.00e+00 ENSG00000178252 Astrocytes
DYNC2H1 0.00e+00 0.2612596 0.410 0.297 0.0000000 0.0000000 0.3380476 0.510 0.384 0.0000000 0.0000000 0.00e+00 ENSG00000187240 Astrocytes
FRY 0.00e+00 1.2180883 0.853 0.324 0.0000000 0.3448368 0.3441919 0.429 0.323 1.0000000 0.3448368 0.00e+00 ENSG00000073910 CADPS2
ADAM22 0.00e+00 1.3850225 0.800 0.321 0.0000000 0.2203285 0.2997259 0.500 0.388 1.0000000 0.2203285 0.00e+00 ENSG00000008277 CADPS2
AC025159.1 0.00e+00 1.1013532 0.520 0.220 0.0000000 0.3039371 0.3276836 0.429 0.321 1.0000000 0.3039371 0.00e+00 ENSG00000257815 CADPS2
GSE1 0.00e+00 0.6835713 0.527 0.235 0.0000000 0.2207884 0.5067989 0.286 0.185 1.0000000 0.2207884 0.00e+00 ENSG00000131149 CADPS2
NR2F1-AS1 0.00e+00 0.5435472 0.453 0.197 0.0000000 0.2277530 0.4900916 0.286 0.182 1.0000000 0.2277530 0.00e+00 ENSG00000237187 CADPS2
PER3 0.00e+00 0.5172644 0.553 0.362 0.0000002 0.7094629 0.3009337 0.286 0.415 1.0000000 0.7094629 0.00e+00 ENSG00000049246 CADPS2
PTEN 0.00e+00 0.4477482 0.660 0.498 0.0000078 0.9527536 0.2537171 0.357 0.458 1.0000000 0.9527536 0.00e+00 ENSG00000171862 CADPS2
ZC3H14 0.00e+00 0.4839486 0.493 0.346 0.0007162 0.2056443 0.4187910 0.429 0.327 1.0000000 0.2056443 1.00e-07 ENSG00000100722 CADPS2
SPOCK2 0.00e+00 0.3636621 0.347 0.184 0.0008550 0.2388565 0.2522941 0.286 0.184 1.0000000 0.2388565 1.00e-07 ENSG00000107742 CADPS2
KLHL7 4.65e-05 0.3792577 0.347 0.231 1.0000000 0.2342242 0.3395831 0.357 0.245 1.0000000 0.2342242 9.31e-05 ENSG00000122550 CADPS2
STXBP6 0.00e+00 0.3372773 0.465 0.363 0.0000000 0.0000000 0.6048888 0.608 0.442 0.0000000 0.0000000 0.00e+00 ENSG00000168952 Endothelial
MAST4 0.00e+00 0.3579624 0.550 0.437 0.0000000 0.0000000 0.4652129 0.565 0.363 0.0000000 0.0000000 0.00e+00 ENSG00000069020 Endothelial
ATL3 0.00e+00 0.2585679 0.307 0.204 0.0000000 0.0000000 0.3370431 0.380 0.203 0.0000000 0.0000000 0.00e+00 ENSG00000184743 Endothelial
FBXW7 0.00e+00 0.3349889 0.486 0.385 0.0000000 0.0000000 0.4441103 0.537 0.371 0.0000000 0.0000000 0.00e+00 ENSG00000109670 Endothelial
VPS13A 0.00e+00 0.2869240 0.385 0.274 0.0000000 0.0000000 0.3218606 0.378 0.240 0.0000000 0.0000000 0.00e+00 ENSG00000197969 Endothelial
MTUS1 0.00e+00 0.2518388 0.782 0.666 0.0000000 0.0000000 0.2896472 0.843 0.708 0.0000000 0.0000000 0.00e+00 ENSG00000129422 Endothelial
PSMB1 0.00e+00 0.2825220 0.372 0.267 0.0000000 0.0000000 0.2780885 0.387 0.258 0.0000000 0.0000000 0.00e+00 ENSG00000008018 Endothelial
PABPC1 0.00e+00 0.2641459 0.491 0.384 0.0000000 0.0000000 0.3224899 0.442 0.321 0.0000000 0.0000000 0.00e+00 ENSG00000070756 Endothelial
PPIB 0.00e+00 0.2656296 0.301 0.200 0.0000000 0.0000000 0.2514439 0.312 0.195 0.0000000 0.0000000 0.00e+00 ENSG00000166794 Endothelial
UQCRB 0.00e+00 0.2861097 0.327 0.225 0.0000000 0.0000000 0.2709786 0.308 0.200 0.0000000 0.0000000 0.00e+00 ENSG00000156467 Endothelial
ENAH 0.00e+00 0.6169393 0.805 0.391 0.0000000 0.0000000 0.2683167 0.654 0.470 0.0005474 0.0000000 0.00e+00 ENSG00000154380 Ependymal
KANSL1L 0.00e+00 0.3733139 0.858 0.587 0.0000000 0.0000000 0.2634195 0.705 0.499 0.0000133 0.0000000 0.00e+00 ENSG00000144445 Ependymal
AC113414.1 0.00e+00 0.3731328 0.443 0.195 0.0000000 0.0000000 0.2700146 0.410 0.236 0.0000515 0.0000000 0.00e+00 ENSG00000254186 Ependymal
MYO9A 0.00e+00 0.3565367 0.854 0.568 0.0000000 0.0000000 0.2643601 0.797 0.614 0.0001034 0.0000000 0.00e+00 ENSG00000066933 Ependymal
ZMAT1 0.00e+00 0.2831260 0.628 0.323 0.0000000 0.0000000 0.2561522 0.645 0.444 0.0000580 0.0000000 0.00e+00 ENSG00000166432 Ependymal
CDH11 0.00e+00 0.3113416 0.579 0.293 0.0000000 0.0000000 0.2841062 0.622 0.411 0.0000082 0.0000000 0.00e+00 ENSG00000140937 Ependymal
ZNF235 0.00e+00 0.2827051 0.458 0.212 0.0000000 0.0000000 0.2788253 0.392 0.220 0.0000089 0.0000000 0.00e+00 ENSG00000159917 Ependymal
MARCH6 0.00e+00 0.2770438 0.830 0.603 0.0000000 0.0000000 0.2677482 0.770 0.591 0.0000012 0.0000000 0.00e+00 ENSG00000145495 Ependymal
TSC22D2 0.00e+00 0.2624078 0.687 0.455 0.0000000 0.0000000 0.3138391 0.594 0.402 0.0001398 0.0000000 0.00e+00 ENSG00000196428 Ependymal
CHD2 0.00e+00 0.2534162 0.836 0.666 0.0000000 0.0000000 0.3078358 0.802 0.624 0.0000095 0.0000000 0.00e+00 ENSG00000173575 Ependymal
MT-ND2 0.00e+00 0.3055538 0.969 0.857 0.0000000 0.0000000 0.5808768 0.976 0.829 0.0000000 0.0000000 0.00e+00 ENSG00000198763 Excitatory
HSP90AB1 1.00e-07 0.3702898 0.823 0.625 0.0035144 0.0000000 0.9761000 0.789 0.534 0.0000000 0.0000001 0.00e+00 ENSG00000096384 Excitatory
CALM1 0.00e+00 0.6541679 0.901 0.638 0.0000000 0.0000000 0.6551169 0.867 0.697 0.0000000 0.0000000 0.00e+00 ENSG00000198668 Excitatory
ATP6V0C 0.00e+00 0.4609087 0.542 0.342 0.0000000 0.0000000 0.6136952 0.529 0.357 0.0000000 0.0000000 0.00e+00 ENSG00000185883 Excitatory
AC013287.1 0.00e+00 0.3626707 0.300 0.173 0.0000000 0.0000000 0.3617282 0.239 0.121 0.0000000 0.0000000 0.00e+00 ENSG00000228566 Excitatory
SERINC1 0.00e+00 0.4032031 0.657 0.400 0.0000000 0.0000000 0.4248225 0.571 0.409 0.0000000 0.0000000 0.00e+00 ENSG00000111897 Excitatory
TERF2IP 0.00e+00 0.3118220 0.658 0.416 0.0000000 0.0000000 0.3518871 0.660 0.470 0.0000000 0.0000000 0.00e+00 ENSG00000166848 Excitatory
MORF4L1 0.00e+00 0.2943523 0.701 0.460 0.0000000 0.0000000 0.3043919 0.575 0.446 0.0000001 0.0000000 0.00e+00 ENSG00000185787 Excitatory
ATP1B1 0.00e+00 0.3565889 0.867 0.538 0.0000000 0.0000000 0.7406684 0.788 0.595 0.0000000 0.0000000 0.00e+00 ENSG00000143153 Excitatory
PPIA 0.00e+00 0.3311437 0.571 0.362 0.0000000 0.0000002 0.3267490 0.481 0.375 0.0040698 0.0000002 0.00e+00 ENSG00000196262 Excitatory
PTPRS 0.00e+00 0.5967663 0.869 0.432 0.0000000 0.0000000 0.3343587 0.661 0.480 0.0000004 0.0000000 0.00e+00 ENSG00000105426 GABA
IDS 0.00e+00 0.5067879 0.734 0.382 0.0000000 0.0000000 0.3158146 0.581 0.404 0.0000008 0.0000000 0.00e+00 ENSG00000010404 GABA
TMEM161B-AS1 0.00e+00 0.4393899 0.910 0.630 0.0000000 0.0000000 0.2592223 0.819 0.636 0.0000035 0.0000000 0.00e+00 ENSG00000247828 GABA
BACH2 0.00e+00 0.3974816 0.824 0.476 0.0000000 0.0000000 0.2761182 0.742 0.540 0.0000019 0.0000000 0.00e+00 ENSG00000112182 GABA
DCC 0.00e+00 0.8467024 0.443 0.209 0.0000000 0.0000000 0.5498832 0.403 0.237 0.0000006 0.0000000 0.00e+00 ENSG00000187323 GABA
DOP1A 0.00e+00 0.3012632 0.730 0.413 0.0000000 0.0000000 0.2796247 0.677 0.468 0.0000002 0.0000000 0.00e+00 ENSG00000083097 GABA
PER3 0.00e+00 0.2977141 0.660 0.360 0.0000000 0.0000000 0.2852912 0.641 0.413 0.0000002 0.0000000 0.00e+00 ENSG00000049246 GABA
RPRD2 0.00e+00 0.2719440 0.803 0.497 0.0000000 0.0000000 0.2533176 0.726 0.517 0.0000006 0.0000000 0.00e+00 ENSG00000163125 GABA
CLSTN1 0.00e+00 0.2573252 0.652 0.410 0.0000000 0.0000000 0.2786382 0.597 0.401 0.0000042 0.0000000 0.00e+00 ENSG00000171603 GABA
LUC7L3 0.00e+00 0.2638336 0.947 0.746 0.0000006 0.0000000 0.3096814 0.887 0.783 0.0000430 0.0000000 0.00e+00 ENSG00000108848 GABA
EHBP1 0.00e+00 0.3112867 0.722 0.533 0.0000000 0.0000000 0.4749452 0.769 0.479 0.0000000 0.0000000 0.00e+00 ENSG00000115504 Inhibitory
FAM222B 0.00e+00 0.2623521 0.614 0.405 0.0000000 0.0000000 0.3677829 0.680 0.418 0.0000000 0.0000000 0.00e+00 ENSG00000173065 Inhibitory
CTIF 0.00e+00 0.2545172 0.675 0.474 0.0000000 0.0000000 0.4173490 0.701 0.473 0.0000000 0.0000000 0.00e+00 ENSG00000134030 Inhibitory
LRP1B 0.00e+00 0.3631062 0.910 0.756 0.0000000 0.0000000 0.4498869 0.929 0.787 0.0000000 0.0000000 0.00e+00 ENSG00000168702 Inhibitory
CAMK2D 0.00e+00 0.3261693 0.746 0.572 0.0000000 0.0000000 0.3474505 0.755 0.541 0.0000000 0.0000000 0.00e+00 ENSG00000145349 Inhibitory
SUPT3H 0.00e+00 0.2657936 0.700 0.539 0.0000000 0.0000000 0.3112079 0.743 0.521 0.0000000 0.0000000 0.00e+00 ENSG00000196284 Inhibitory
TTN-AS1 0.00e+00 0.3661652 0.452 0.310 0.0000000 0.0000000 0.3891230 0.535 0.340 0.0000000 0.0000000 0.00e+00 ENSG00000237298 Inhibitory
ACACA 0.00e+00 0.3533133 0.734 0.576 0.0000000 0.0000000 0.2699299 0.769 0.606 0.0000000 0.0000000 0.00e+00 ENSG00000278540 Inhibitory
LARGE1 0.00e+00 0.3425878 0.669 0.489 0.0000000 0.0000000 0.3388124 0.643 0.459 0.0000000 0.0000000 0.00e+00 ENSG00000133424 Inhibitory
AL589740.1 0.00e+00 0.7779582 0.573 0.398 0.0000000 0.0000000 0.4114561 0.615 0.490 0.0000000 0.0000000 0.00e+00 ENSG00000271860 Inhibitory
LINC00623 0.00e+00 0.5235028 0.402 0.210 0.0000000 0.0000000 0.4282357 0.308 0.199 0.0000000 0.0000000 0.00e+00 ENSG00000226067 Microglia
ATP10D 0.00e+00 0.4567753 0.389 0.227 0.0000000 0.0000000 0.4276893 0.308 0.202 0.0000000 0.0000000 0.00e+00 ENSG00000145246 Microglia
CDC14A 0.00e+00 0.3477190 0.323 0.165 0.0000000 0.0000000 0.3262990 0.244 0.138 0.0000000 0.0000000 0.00e+00 ENSG00000079335 Microglia
ARPC3 0.00e+00 0.4324433 0.368 0.224 0.0000000 0.0000000 0.4413968 0.329 0.224 0.0000000 0.0000000 0.00e+00 ENSG00000111229 Microglia
FLOT1 0.00e+00 0.4552752 0.391 0.255 0.0000000 0.0000000 0.4737700 0.315 0.208 0.0000000 0.0000000 0.00e+00 ENSG00000137312 Microglia
BAZ1A 0.00e+00 0.4152126 0.391 0.253 0.0000000 0.0000000 0.4286763 0.307 0.192 0.0000000 0.0000000 0.00e+00 ENSG00000198604 Microglia
AL008633.1 0.00e+00 0.4766687 0.227 0.114 0.0000000 0.0000000 0.5024777 0.212 0.111 0.0000000 0.0000000 0.00e+00 ENSG00000225689 Microglia
OSBPL11 0.00e+00 0.3141018 0.358 0.228 0.0000000 0.0000000 0.3102440 0.318 0.204 0.0000000 0.0000000 0.00e+00 ENSG00000144909 Microglia
CNTLN 0.00e+00 0.4254490 0.367 0.250 0.0000000 0.0000000 0.4456522 0.341 0.231 0.0000000 0.0000000 0.00e+00 ENSG00000044459 Microglia
SRBD1 0.00e+00 0.4278456 0.321 0.212 0.0000000 0.0000000 0.4663284 0.330 0.228 0.0000000 0.0000000 0.00e+00 ENSG00000068784 Microglia
AC006148.1 0.00e+00 0.5980009 0.615 0.271 0.0000000 0.0000000 0.2562314 0.485 0.326 0.0000000 0.0000000 0.00e+00 ENSG00000242593 Oligodendrocytes
TOX3 0.00e+00 0.5634575 0.398 0.097 0.0000000 0.0000000 0.2585942 0.256 0.121 0.0000000 0.0000000 0.00e+00 ENSG00000103460 Oligodendrocytes
GAS2 0.00e+00 0.4337700 0.378 0.129 0.0000000 0.0000000 0.2592877 0.327 0.188 0.0000000 0.0000000 0.00e+00 ENSG00000148935 Oligodendrocytes
AC007364.1 0.00e+00 0.3332576 0.309 0.161 0.0000000 0.0000000 0.6048222 0.480 0.237 0.0000000 0.0000000 0.00e+00 ENSG00000231969 Oligodendrocytes
UBB 0.00e+00 0.2562117 0.480 0.311 0.0000000 0.0000000 0.3487071 0.518 0.303 0.0000000 0.0000000 0.00e+00 ENSG00000170315 Oligodendrocytes
CALN1 0.00e+00 0.3778386 0.519 0.282 0.0000000 0.0000000 0.2673905 0.510 0.361 0.0000000 0.0000000 0.00e+00 ENSG00000183166 Oligodendrocytes
CYP7B1 0.00e+00 0.2566274 0.312 0.175 0.0000000 0.0000000 0.4132043 0.463 0.275 0.0000000 0.0000000 0.00e+00 ENSG00000172817 Oligodendrocytes
UTY 0.00e+00 0.2716488 0.711 0.557 0.0000000 0.0000000 0.3300194 0.669 0.504 0.0000000 0.0000000 0.00e+00 ENSG00000183878 Oligodendrocytes
LINC01184 0.00e+00 0.3072642 0.529 0.309 0.0000000 0.0000000 0.2637137 0.562 0.410 0.0000000 0.0000000 0.00e+00 ENSG00000245937 Oligodendrocytes
NKAIN1 0.00e+00 0.2928061 0.178 0.050 0.0000000 0.0000000 0.2869113 0.184 0.075 0.0000000 0.0000000 0.00e+00 ENSG00000084628 Oligodendrocytes
EPB41L5 0.00e+00 0.2572713 0.386 0.221 0.0000000 0.0000000 0.3103264 0.434 0.226 0.0000000 0.0000000 0.00e+00 ENSG00000115109 OPC
SFPQ 0.00e+00 0.2531912 0.715 0.570 0.0000000 0.0000000 0.3095741 0.773 0.602 0.0000000 0.0000000 0.00e+00 ENSG00000116560 OPC
DANT2 0.00e+00 0.2920226 0.553 0.409 0.0000000 0.0000000 0.3482132 0.724 0.536 0.0000000 0.0000000 0.00e+00 ENSG00000235244 OPC
PHF14 0.00e+00 0.2640296 0.738 0.570 0.0000000 0.0000000 0.2918490 0.763 0.580 0.0000000 0.0000000 0.00e+00 ENSG00000106443 OPC
AL359232.1 0.00e+00 0.3453247 0.164 0.056 0.0000000 0.0000000 0.2807060 0.241 0.131 0.0000000 0.0000000 0.00e+00 ENSG00000258561 OPC
SCMH1 0.00e+00 0.2618803 0.598 0.387 0.0000000 0.0000000 0.2552713 0.625 0.426 0.0000000 0.0000000 0.00e+00 ENSG00000010803 OPC
AC002460.2 0.00e+00 0.3406536 0.288 0.135 0.0000000 0.0000000 0.3089465 0.286 0.165 0.0000000 0.0000000 0.00e+00 ENSG00000287292 OPC
PTPN4 0.00e+00 0.2592480 0.594 0.411 0.0000000 0.0000000 0.2516403 0.631 0.455 0.0000000 0.0000000 0.00e+00 ENSG00000088179 OPC
PPA2 0.00e+00 0.2770489 0.663 0.478 0.0000000 0.0000000 0.2502462 0.624 0.464 0.0000000 0.0000000 0.00e+00 ENSG00000138777 OPC
SUCLG2-AS1 0.00e+00 0.2922460 0.323 0.169 0.0000000 0.0000000 0.2617787 0.295 0.185 0.0000000 0.0000000 0.00e+00 ENSG00000241316 OPC
RNF144B 0.00e+00 0.2648653 0.232 0.130 0.0000000 0.0000000 0.5048203 0.329 0.084 0.0000000 0.0000000 0.00e+00 ENSG00000137393 Pericytes
CENPP 0.00e+00 0.4318045 0.331 0.230 0.0000000 0.0000000 0.5721510 0.453 0.271 0.0000000 0.0000000 0.00e+00 ENSG00000188312 Pericytes
SEC31A 0.00e+00 0.2831367 0.507 0.396 0.0000000 0.0000000 0.3559391 0.575 0.395 0.0000000 0.0000000 0.00e+00 ENSG00000138674 Pericytes
PLEKHG3 0.00e+00 0.3573501 0.444 0.255 0.0000000 0.0000000 0.2749392 0.472 0.342 0.0000000 0.0000000 0.00e+00 ENSG00000126822 Pericytes
CREB3L2 0.00e+00 0.3078524 0.432 0.306 0.0000000 0.0000000 0.3385584 0.469 0.295 0.0000000 0.0000000 0.00e+00 ENSG00000182158 Pericytes
ANO10 0.00e+00 0.3136331 0.493 0.372 0.0000000 0.0000000 0.3333278 0.510 0.338 0.0000000 0.0000000 0.00e+00 ENSG00000160746 Pericytes
SLC39A10 0.00e+00 0.2882655 0.406 0.295 0.0000000 0.0000000 0.4245030 0.431 0.277 0.0000000 0.0000000 0.00e+00 ENSG00000196950 Pericytes
EVA1C 0.00e+00 0.2801387 0.567 0.425 0.0000000 0.0000000 0.2588896 0.670 0.538 0.0000000 0.0000000 0.00e+00 ENSG00000166979 Pericytes
SMG6 0.00e+00 0.3528398 0.513 0.387 0.0000000 0.0000000 0.2800313 0.528 0.425 0.0000000 0.0000000 0.00e+00 ENSG00000070366 Pericytes
TANK 0.00e+00 0.2961007 0.540 0.418 0.0000000 0.0000000 0.2950809 0.541 0.405 0.0000000 0.0000000 0.00e+00 ENSG00000136560 Pericytes
# Searchable table
Conserved_Markers %>% 
  mutate_if(is.numeric, ~round(.,4)) %>% 
  DT::datatable()
# Read Pathways files
gprofiler_pathways <- read_delim("PD_midbrain_snRNAseq_GSE157783_Conserved_Markers_gprofiler_pathways.txt", 
                            "\t", escape_double = FALSE, trim_ws = TRUE)

cell_types <- seurat_anno$cluster_annotation %>% 
  unique() %>% as.character() %>% sort()

#---------- Conserved_Markers_Gprofiler
gprofiler_top10 <- function(pathway,ident_name){
  pathway %>% 
      dplyr::filter(Conserved_Markers == ident_name) %>% 
      dplyr::select(Conserved_Markers,
             source, 
             term_id, 
             term_name,
             term_size,
             query_size,
             intersection_size,
             recall,
             p_value) %>% 
    top_n(n = -10, wt = p_value) %>% 
    kable(caption="Marker Pathways Top10", 
          format.args = list(big.mark = ",")) %>% 
    kable_styling(c("striped", "bordered"))
  }

gprofiler_top10fig <- function(pathway,ident_name){
   pathway %>% 
      dplyr::filter(Conserved_Markers == ident_name) %>% 
      dplyr::select(Conserved_Markers,
             source, 
             term_id, 
             term_name,
             term_size,
             query_size,
             intersection_size,
             recall,
             p_value) %>% 
    top_n(n = -10, wt = p_value) %>% 
    ggplot(aes(x=recall, 
               y=term_name, 
               colour=p_value, 
               size=intersection_size)) + 
           geom_point() + 
           expand_limits(x=0) +
           labs(x="Hits (%)", 
                y=ident_name, 
                colour="p value", 
                size="Count") +
          theme_bw()
  }
  
gprofiler_fig <- function(markers,ident_name){
   genes <- markers %>% 
      dplyr::filter(Conserved_Markers == ident_name)  
   gost(query = genes$Gene_Symbol, 
        organism = "hsapiens",
        correction_method = "g_SCS") %>% 
      gostplot(capped = FALSE, interactive = FALSE)
  }   
   
mclapply(cell_types, function(i) {
  gprofiler_top10(gprofiler_pathways,i)
  gprofiler_top10fig(gprofiler_pathways,i)
  gprofiler_fig(Conserved_Markers,i)
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

# Searchable table
gprofiler_pathways %>% 
  dplyr::select(Conserved_Markers,
             source, 
             term_id, 
             term_name,
             term_size,
             query_size,
             intersection_size,
             recall,
             p_value) %>% 
  dplyr::mutate_if(is.numeric, ~round(.,4)) %>% 
  DT::datatable()

Seurat Differential_Expression

Find DEG between old and young mice per celltype Using 0.5 logFC

#---------- Differential_Expression
# RNA stores the original and normalized counts for finding markers
DefaultAssay(seurat_anno) <- "RNA"

# add group_cluster info to "identity" for each barcoded UMI count
seurat_anno@meta.data$group_cluster <- paste(seurat_anno@meta.data$group, seurat_anno@meta.data$cluster_annotation, sep = "_")

# make this the Idents()
Idents(seurat_anno) <- seurat_anno$group_cluster

#---------- Find Differential expressed genes between groups
get_deg <- function(seurat_rna,ident1,ident2){
  FindMarkers(seurat_rna, 
              ident.1 = ident1, 
              ident.2 = ident2, 
              test.use = "MAST", 
              latent.vars = NULL, 
              logfc.threshold = 0, 
              min.pct = 0,
              min.diff.pct = -Inf, 
              min.cells.feature = 10, 
              min.cells.group = 10, 
              verbose = FALSE) %>% 
    rownames_to_column(var = "Gene_Symbol") %>% 
    left_join(Gene_Emsembl_Symbol, by="Gene_Symbol") %>% 
    dplyr::mutate(Comparison = paste(ident1, ident2, sep = "_vs_"))
}

#---------- DEG 
cell_types <- seurat_anno$cluster_annotation %>% 
  unique() %>% as.character() %>% sort()

DEG <- mclapply(cell_types, mc.cores = 5, function(i) {
  id1 <- paste0("IPD_",i)
  id2 <- paste0("HC_",i)
  get_deg(seurat_anno,id1,id2) 
}) %>% bind_rows()

# save the results
write_delim(DEG, 
  "PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST.txt", 
  delim = "\t")
# Read Marker files
DEG <- read_delim("PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST.txt", 
                            "\t", escape_double = FALSE, trim_ws = TRUE)

# Number of DEGs
merge(DEG %>% 
        dplyr::count(Comparison) %>%
        dplyr::rename(Total_Genes = n), 
      DEG %>% 
        dplyr::filter(p_val_adj <= 0.05 & avg_logFC >= 0.25 | 
               p_val_adj <= 0.05 & avg_logFC <= -0.25) %>% 
        dplyr::count(Comparison) %>%
        dplyr::rename(Genes_FDR0.05_logFC0.25 = n), 
      by = "Comparison") %>% 
  kable(caption="Number of Differentially Expressed Genes", 
        format.args = list(big.mark = ",")) %>% 
  kable_styling(c("striped", "bordered"))
Number of Differentially Expressed Genes
Comparison Total_Genes Genes_FDR0.05_logFC0.25
IPD_Astrocytes_vs_HC_Astrocytes 24,297 325
IPD_CADPS2_vs_HC_CADPS2 17,729 18
IPD_Endothelial_vs_HC_Endothelial 23,545 397
IPD_Ependymal_vs_HC_Ependymal 23,033 394
IPD_Excitatory_vs_HC_Excitatory 24,987 218
IPD_GABA_vs_HC_GABA 22,065 142
IPD_Inhibitory_vs_HC_Inhibitory 24,925 150
IPD_Microglia_vs_HC_Microglia 24,228 281
IPD_Oligodendrocytes_vs_HC_Oligodendrocytes 23,839 283
IPD_OPC_vs_HC_OPC 24,868 117
IPD_Pericytes_vs_HC_Pericytes 23,088 262
# Extract top 10 DEGs per cluster
DEG %>% 
  group_by(Comparison) %>% 
  top_n(n = -10, 
        wt = p_val_adj) %>% 
  kable(caption="Differentially Expressed Genes", 
        format.args = list(big.mark = ",")) %>% 
  kable_styling(c("striped", "bordered"))
Differentially Expressed Genes
Gene_Symbol p_val avg_logFC pct.1 pct.2 p_val_adj Gene_Emsembl Comparison
NEAT1 0e+00 1.0102347 0.994 0.978 0.0000000 ENSG00000245532 IPD_Astrocytes_vs_HC_Astrocytes
SLC39A11 0e+00 0.6187040 0.934 0.855 0.0000000 ENSG00000133195 IPD_Astrocytes_vs_HC_Astrocytes
RANBP3L 0e+00 0.7907816 0.730 0.506 0.0000000 ENSG00000164188 IPD_Astrocytes_vs_HC_Astrocytes
COL27A1 0e+00 0.7419343 0.671 0.377 0.0000000 ENSG00000196739 IPD_Astrocytes_vs_HC_Astrocytes
MRPS6 0e+00 0.7235911 0.454 0.153 0.0000000 ENSG00000243927 IPD_Astrocytes_vs_HC_Astrocytes
SLC38A2 0e+00 0.6803013 0.452 0.146 0.0000000 ENSG00000134294 IPD_Astrocytes_vs_HC_Astrocytes
PFKP 0e+00 0.6710124 0.698 0.498 0.0000000 ENSG00000067057 IPD_Astrocytes_vs_HC_Astrocytes
CHI3L1 0e+00 1.1374594 0.497 0.243 0.0000000 ENSG00000133048 IPD_Astrocytes_vs_HC_Astrocytes
SMTN 0e+00 0.8327269 0.386 0.125 0.0000000 ENSG00000183963 IPD_Astrocytes_vs_HC_Astrocytes
LINC00299 0e+00 -0.5882425 0.547 0.760 0.0000000 ENSG00000236790 IPD_Astrocytes_vs_HC_Astrocytes
RALYL 0e+00 1.8419993 0.993 0.643 0.0000000 ENSG00000184672 IPD_CADPS2_vs_HC_CADPS2
ZNF385D 0e+00 1.0845952 0.993 0.714 0.0000040 ENSG00000151789 IPD_CADPS2_vs_HC_CADPS2
DLGAP2 0e+00 -1.7565938 0.013 0.643 0.0000167 ENSG00000198010 IPD_CADPS2_vs_HC_CADPS2
SH3RF3 0e+00 -0.9779982 0.000 0.429 0.0008484 ENSG00000172985 IPD_CADPS2_vs_HC_CADPS2
PTCHD4 0e+00 -1.2098472 0.000 0.429 0.0008484 ENSG00000244694 IPD_CADPS2_vs_HC_CADPS2
MALAT1 0e+00 0.5780715 1.000 1.000 0.0008618 ENSG00000251562 IPD_CADPS2_vs_HC_CADPS2
ADARB2 0e+00 -2.5981741 0.073 0.714 0.0013130 ENSG00000185736 IPD_CADPS2_vs_HC_CADPS2
CADPS2 1e-07 0.8430110 0.973 0.429 0.0014255 ENSG00000081803 IPD_CADPS2_vs_HC_CADPS2
MSRA 1e-07 1.1843322 0.980 0.571 0.0018409 ENSG00000175806 IPD_CADPS2_vs_HC_CADPS2
MT-ND4 1e-07 -1.2351560 0.933 1.000 0.0019402 ENSG00000198886 IPD_CADPS2_vs_HC_CADPS2
HSPA1A 0e+00 1.4171708 0.733 0.315 0.0000000 ENSG00000204389 IPD_Endothelial_vs_HC_Endothelial
HSP90AA1 0e+00 0.9771887 0.887 0.784 0.0000000 ENSG00000080824 IPD_Endothelial_vs_HC_Endothelial
HSPH1 0e+00 0.9525667 0.607 0.202 0.0000000 ENSG00000120694 IPD_Endothelial_vs_HC_Endothelial
HSPA1B 0e+00 0.9038222 0.431 0.099 0.0000000 ENSG00000204388 IPD_Endothelial_vs_HC_Endothelial
THSD4 0e+00 -0.8865612 0.811 0.906 0.0000000 ENSG00000187720 IPD_Endothelial_vs_HC_Endothelial
DNAJA1 0e+00 0.6755657 0.619 0.351 0.0000000 ENSG00000086061 IPD_Endothelial_vs_HC_Endothelial
GBP4 0e+00 1.1350516 0.526 0.367 0.0000000 ENSG00000162654 IPD_Endothelial_vs_HC_Endothelial
SLC38A2 0e+00 0.6498719 0.712 0.452 0.0000000 ENSG00000134294 IPD_Endothelial_vs_HC_Endothelial
HSP90AB1 0e+00 0.6070880 0.807 0.718 0.0000000 ENSG00000096384 IPD_Endothelial_vs_HC_Endothelial
CIITA 0e+00 0.6977486 0.231 0.022 0.0000000 ENSG00000179583 IPD_Endothelial_vs_HC_Endothelial
CFAP157 0e+00 -0.6223796 0.926 0.954 0.0000000 ENSG00000160401 IPD_Ependymal_vs_HC_Ependymal
PLCG2 0e+00 1.0578472 0.780 0.369 0.0000000 ENSG00000197943 IPD_Ependymal_vs_HC_Ependymal
LMO2 0e+00 -0.7900509 0.458 0.765 0.0000000 ENSG00000135363 IPD_Ependymal_vs_HC_Ependymal
PER1 0e+00 -0.8306501 0.471 0.756 0.0000000 ENSG00000179094 IPD_Ependymal_vs_HC_Ependymal
SPAG8 0e+00 -0.5624806 0.774 0.853 0.0000000 ENSG00000137098 IPD_Ependymal_vs_HC_Ependymal
FAM184A 0e+00 -0.7290285 0.644 0.885 0.0000000 ENSG00000111879 IPD_Ependymal_vs_HC_Ependymal
TOB1 0e+00 -0.7759352 0.520 0.779 0.0000000 ENSG00000141232 IPD_Ependymal_vs_HC_Ependymal
FZD3 0e+00 0.6237558 0.929 0.682 0.0000000 ENSG00000104290 IPD_Ependymal_vs_HC_Ependymal
PAPLN 0e+00 -0.5012439 0.071 0.419 0.0000000 ENSG00000100767 IPD_Ependymal_vs_HC_Ependymal
CIRBP 0e+00 -0.6191617 0.375 0.728 0.0000000 ENSG00000099622 IPD_Ependymal_vs_HC_Ependymal
MEIS2 0e+00 0.8832146 0.496 0.157 0.0000000 ENSG00000134138 IPD_Excitatory_vs_HC_Excitatory
TFAP2D 0e+00 0.4101668 0.292 0.037 0.0000000 ENSG00000008197 IPD_Excitatory_vs_HC_Excitatory
LHX9 0e+00 0.3182558 0.396 0.113 0.0000000 ENSG00000143355 IPD_Excitatory_vs_HC_Excitatory
SGCZ 0e+00 0.9482440 0.805 0.549 0.0000000 ENSG00000185053 IPD_Excitatory_vs_HC_Excitatory
EBF3 0e+00 0.4825732 0.448 0.162 0.0000000 ENSG00000108001 IPD_Excitatory_vs_HC_Excitatory
MT-CO1 0e+00 -0.5908875 0.975 0.994 0.0000000 ENSG00000198804 IPD_Excitatory_vs_HC_Excitatory
AC119673.2 0e+00 0.4238447 0.404 0.159 0.0000000 ENSG00000286619 IPD_Excitatory_vs_HC_Excitatory
CPNE4 0e+00 -0.8363654 0.417 0.585 0.0000000 ENSG00000196353 IPD_Excitatory_vs_HC_Excitatory
EBF1 0e+00 0.7749330 0.404 0.160 0.0000000 ENSG00000164330 IPD_Excitatory_vs_HC_Excitatory
ANK1 0e+00 0.3258161 0.765 0.485 0.0000000 ENSG00000029534 IPD_Excitatory_vs_HC_Excitatory
PTPRT 0e+00 0.8162530 0.967 0.722 0.0000000 ENSG00000196090 IPD_GABA_vs_HC_GABA
KCNQ1OT1 0e+00 -0.8697001 0.734 0.871 0.0000000 ENSG00000269821 IPD_GABA_vs_HC_GABA
NEGR1 0e+00 0.7371908 0.963 0.847 0.0000000 ENSG00000172260 IPD_GABA_vs_HC_GABA
KCTD8 0e+00 0.8975399 0.910 0.665 0.0000000 ENSG00000183783 IPD_GABA_vs_HC_GABA
RALYL 0e+00 0.4899623 0.984 0.984 0.0000000 ENSG00000184672 IPD_GABA_vs_HC_GABA
AHI1 0e+00 -0.5715830 0.803 0.911 0.0000000 ENSG00000135541 IPD_GABA_vs_HC_GABA
SDK1 0e+00 0.6983150 0.943 0.786 0.0000000 ENSG00000146555 IPD_GABA_vs_HC_GABA
LINC01122 0e+00 0.6490492 0.881 0.629 0.0000000 ENSG00000233723 IPD_GABA_vs_HC_GABA
CCDC146 0e+00 -0.7281565 0.402 0.641 0.0000000 ENSG00000135205 IPD_GABA_vs_HC_GABA
LDB2 0e+00 0.5049193 0.918 0.665 0.0000000 ENSG00000169744 IPD_GABA_vs_HC_GABA
DLX6-AS1 0e+00 -0.6371891 0.001 0.227 0.0000000 ENSG00000231764 IPD_Inhibitory_vs_HC_Inhibitory
TFAP2B 0e+00 0.4487000 0.246 0.058 0.0000000 ENSG00000008196 IPD_Inhibitory_vs_HC_Inhibitory
GATA3 0e+00 0.3046156 0.315 0.100 0.0000000 ENSG00000107485 IPD_Inhibitory_vs_HC_Inhibitory
DLX1 0e+00 -0.1335462 0.000 0.108 0.0000000 ENSG00000144355 IPD_Inhibitory_vs_HC_Inhibitory
FAM27C 0e+00 -0.4504831 0.414 0.613 0.0000000 ENSG00000231527 IPD_Inhibitory_vs_HC_Inhibitory
LINC01210 0e+00 0.3192183 0.235 0.059 0.0000000 ENSG00000239513 IPD_Inhibitory_vs_HC_Inhibitory
MT-CO1 0e+00 -0.4595117 0.954 0.989 0.0000000 ENSG00000198804 IPD_Inhibitory_vs_HC_Inhibitory
MT-ND4 0e+00 -0.4393826 0.974 0.992 0.0000000 ENSG00000198886 IPD_Inhibitory_vs_HC_Inhibitory
AL592183.1 0e+00 0.3374974 0.451 0.229 0.0000000 ENSG00000273748 IPD_Inhibitory_vs_HC_Inhibitory
AC119673.2 0e+00 0.3859995 0.321 0.133 0.0000000 ENSG00000286619 IPD_Inhibitory_vs_HC_Inhibitory
AP001977.1 0e+00 1.2216857 0.384 0.062 0.0000000 ENSG00000286044 IPD_Microglia_vs_HC_Microglia
CHI3L1 0e+00 0.8058965 0.336 0.037 0.0000000 ENSG00000133048 IPD_Microglia_vs_HC_Microglia
LNCAROD 0e+00 0.7205218 0.891 0.702 0.0000000 ENSG00000231131 IPD_Microglia_vs_HC_Microglia
LINC02397 0e+00 0.7770716 0.377 0.064 0.0000000 ENSG00000205056 IPD_Microglia_vs_HC_Microglia
HSPH1 0e+00 0.3275040 0.481 0.213 0.0000000 ENSG00000120694 IPD_Microglia_vs_HC_Microglia
DIRC3 0e+00 0.9870970 0.322 0.128 0.0000000 ENSG00000231672 IPD_Microglia_vs_HC_Microglia
NEAT1 0e+00 0.3315993 0.986 0.980 0.0000000 ENSG00000245532 IPD_Microglia_vs_HC_Microglia
SLC38A2 0e+00 0.4341306 0.557 0.328 0.0000000 ENSG00000134294 IPD_Microglia_vs_HC_Microglia
PTGDS 0e+00 -0.6524033 0.232 0.398 0.0000000 ENSG00000107317 IPD_Microglia_vs_HC_Microglia
ANK2 0e+00 -0.5692483 0.326 0.489 0.0000000 ENSG00000145362 IPD_Microglia_vs_HC_Microglia
NEAT1 0e+00 0.8088482 0.985 0.953 0.0000000 ENSG00000245532 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
CDKL1 0e+00 0.8000008 0.690 0.324 0.0000000 ENSG00000100490 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
HSPA1A 0e+00 0.7970088 0.603 0.312 0.0000000 ENSG00000204389 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
MSRA 0e+00 0.7804543 0.574 0.287 0.0000000 ENSG00000175806 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
LUCAT1 0e+00 0.7771406 0.402 0.172 0.0000000 ENSG00000248323 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
SLC35F3 0e+00 0.6939169 0.472 0.206 0.0000000 ENSG00000183780 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
SLC38A2 0e+00 0.6693157 0.747 0.449 0.0000000 ENSG00000134294 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
OGFRL1 0e+00 0.6056463 0.267 0.090 0.0000000 ENSG00000119900 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
S100A6 0e+00 0.5979215 0.317 0.085 0.0000000 ENSG00000197956 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
SLC39A11 0e+00 0.5349573 0.943 0.832 0.0000000 ENSG00000133195 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
GRID2 0e+00 0.5324880 0.629 0.301 0.0000000 ENSG00000152208 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
SELENOP 0e+00 0.5289287 0.836 0.743 0.0000000 ENSG00000250722 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
ANKRD18A 0e+00 0.5286409 0.653 0.434 0.0000000 ENSG00000180071 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
JAKMIP3 0e+00 0.5111564 0.726 0.533 0.0000000 ENSG00000188385 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
FMO5 0e+00 0.5056828 0.371 0.166 0.0000000 ENSG00000131781 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
SEM1 0e+00 0.4968863 0.644 0.419 0.0000000 ENSG00000127922 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
HSP90AA1 0e+00 0.4951208 0.950 0.899 0.0000000 ENSG00000080824 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
USP31 0e+00 0.4859553 0.743 0.540 0.0000000 ENSG00000103404 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
TMPRSS5 0e+00 0.4818362 0.360 0.140 0.0000000 ENSG00000166682 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
FBXO32 0e+00 0.4788437 0.671 0.456 0.0000000 ENSG00000156804 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
CHORDC1 0e+00 0.4729798 0.514 0.236 0.0000000 ENSG00000110172 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
TPD52L1 0e+00 0.4675854 0.191 0.012 0.0000000 ENSG00000111907 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
HSPH1 0e+00 0.4633020 0.431 0.183 0.0000000 ENSG00000120694 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
FANCC 0e+00 0.4617200 0.459 0.219 0.0000000 ENSG00000158169 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
FKBP5 0e+00 0.4598565 0.822 0.656 0.0000000 ENSG00000096060 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
ERBIN 0e+00 0.4459280 0.985 0.970 0.0000000 ENSG00000112851 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
HSPA1B 0e+00 0.4374756 0.338 0.107 0.0000000 ENSG00000204388 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
CPQ 0e+00 0.4342444 0.856 0.711 0.0000000 ENSG00000104324 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
PDE4DIP 0e+00 0.4266398 0.789 0.635 0.0000000 ENSG00000178104 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
QDPR 0e+00 0.4217043 0.949 0.877 0.0000000 ENSG00000151552 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
PTGES3 0e+00 0.4215813 0.645 0.439 0.0000000 ENSG00000110958 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
SEPTIN8 0e+00 0.3997771 0.775 0.621 0.0000000 ENSG00000164402 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
FBXO2 0e+00 0.3931558 0.306 0.079 0.0000000 ENSG00000116661 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
MAP2K4 0e+00 0.3839903 0.761 0.582 0.0000000 ENSG00000065559 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
MRPS6 0e+00 0.3179451 0.211 0.029 0.0000000 ENSG00000243927 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
SLC44A1 0e+00 0.2767122 0.994 0.992 0.0000000 ENSG00000070214 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
CTNNA3 0e+00 -0.2541831 0.983 0.991 0.0000000 ENSG00000183230 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
TRIM2 0e+00 -0.3134105 0.948 0.970 0.0000000 ENSG00000109654 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
PTK2 0e+00 -0.3296688 0.931 0.961 0.0000000 ENSG00000169398 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
LUC7L2 0e+00 -0.3739440 0.609 0.744 0.0000000 ENSG00000146963 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
AATK 0e+00 -0.3884724 0.817 0.902 0.0000000 ENSG00000181409 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
SIK3 0e+00 -0.4061477 0.974 0.981 0.0000000 ENSG00000160584 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
ANK2 0e+00 -0.4381293 0.935 0.967 0.0000000 ENSG00000145362 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
ZFYVE16 0e+00 -0.4382659 0.772 0.889 0.0000000 ENSG00000039319 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
PLEKHH1 0e+00 -0.4516099 0.892 0.957 0.0000000 ENSG00000054690 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
KCNH8 0e+00 -0.4794693 0.780 0.886 0.0000000 ENSG00000183960 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
PPP2R2B 0e+00 -0.4893031 0.956 0.983 0.0000000 ENSG00000156475 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
CARNS1 0e+00 -0.5170598 0.626 0.821 0.0000000 ENSG00000172508 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
LINC01949 0e+00 -0.5237451 0.121 0.358 0.0000000 ENSG00000233828 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
CIRBP 0e+00 -0.5557987 0.420 0.699 0.0000000 ENSG00000099622 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
PALM2-AKAP2 0e+00 -0.5633929 0.616 0.783 0.0000000 ENSG00000157654 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
POLR2F 0e+00 -0.5739424 0.708 0.886 0.0000000 ENSG00000100142 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
AC006059.1 0e+00 -0.5745828 0.352 0.525 0.0000000 ENSG00000230084 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
MOBP 0e+00 -0.5780183 0.907 0.968 0.0000000 ENSG00000168314 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
NAALADL2 0e+00 -0.5888721 0.546 0.771 0.0000000 ENSG00000177694 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
TP53TG5 0e+00 -0.6070992 0.364 0.650 0.0000000 ENSG00000124251 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
AL359091.1 0e+00 -0.7824083 0.521 0.759 0.0000000 ENSG00000207955 IPD_Oligodendrocytes_vs_HC_Oligodendrocytes
C1QL1 0e+00 -0.5296440 0.276 0.566 0.0000000 ENSG00000131094 IPD_OPC_vs_HC_OPC
SEM1 0e+00 0.5126273 0.652 0.407 0.0000000 ENSG00000127922 IPD_OPC_vs_HC_OPC
XYLT1 0e+00 -0.4316300 0.920 0.957 0.0000000 ENSG00000103489 IPD_OPC_vs_HC_OPC
EPN2 0e+00 -0.3863809 0.936 0.970 0.0000000 ENSG00000072134 IPD_OPC_vs_HC_OPC
HSP90AA1 0e+00 0.4079712 0.771 0.623 0.0000000 ENSG00000080824 IPD_OPC_vs_HC_OPC
ZNF365 0e+00 0.4442608 0.572 0.359 0.0000000 ENSG00000138311 IPD_OPC_vs_HC_OPC
TPT1 0e+00 0.4680811 0.487 0.297 0.0000000 ENSG00000133112 IPD_OPC_vs_HC_OPC
ANKRD10 0e+00 0.4842279 0.636 0.526 0.0000000 ENSG00000088448 IPD_OPC_vs_HC_OPC
AL592183.1 0e+00 0.3508305 0.484 0.267 0.0000000 ENSG00000273748 IPD_OPC_vs_HC_OPC
RPL37A 0e+00 0.4415866 0.576 0.415 0.0000000 ENSG00000197756 IPD_OPC_vs_HC_OPC
HSPA1A 0e+00 0.9833418 0.620 0.275 0.0000000 ENSG00000204389 IPD_Pericytes_vs_HC_Pericytes
TAGLN 0e+00 -1.3808185 0.329 0.646 0.0000000 ENSG00000149591 IPD_Pericytes_vs_HC_Pericytes
SLC38A2 0e+00 0.6654154 0.857 0.709 0.0000000 ENSG00000134294 IPD_Pericytes_vs_HC_Pericytes
ACTB 0e+00 -0.5957797 0.869 0.940 0.0000000 ENSG00000075624 IPD_Pericytes_vs_HC_Pericytes
ADAMTS9-AS2 0e+00 0.5866795 0.901 0.679 0.0000000 ENSG00000241684 IPD_Pericytes_vs_HC_Pericytes
HSP90AA1 0e+00 0.5152927 0.889 0.842 0.0000000 ENSG00000080824 IPD_Pericytes_vs_HC_Pericytes
AKR1C1 0e+00 0.5646259 0.341 0.087 0.0000000 ENSG00000187134 IPD_Pericytes_vs_HC_Pericytes
LINC00882 0e+00 0.7375457 0.577 0.301 0.0000000 ENSG00000242759 IPD_Pericytes_vs_HC_Pericytes
RANBP3L 0e+00 0.6746139 0.454 0.177 0.0000000 ENSG00000164188 IPD_Pericytes_vs_HC_Pericytes
RIPOR3 0e+00 0.4905358 0.808 0.596 0.0000000 ENSG00000042062 IPD_Pericytes_vs_HC_Pericytes
# Searchable table
DEG %>% 
  filter(p_val_adj <= 0.05 & avg_logFC >= 0.25 | 
               p_val_adj <= 0.05 & avg_logFC <= -0.25) %>% 
  select(Comparison,
         Gene_Symbol, 
         Gene_Emsembl, 
         pct.1,
         pct.2,
         p_val,
         p_val_adj,
         avg_logFC) %>% 
  mutate_if(is.numeric, ~round(.,4)) %>% 
  DT::datatable()

Seurat Differential_Expression_Senescence_Pathways

Senescence Pathways

# Read Marker files
DEG_sig <- DEG %>% filter(p_val_adj <= 0.05 & avg_logFC >= 0.25 | 
                                p_val_adj <= 0.05 & avg_logFC <= -0.25)

# Senescence_Pathways
comparison_names = unique(DEG$Comparison)
pathways_names = list.files("../Senescence/human/")

senescence_enrichment <- mclapply(comparison_names, function(i) { 
  mclapply(pathways_names, function(p) { 
    pathway <- read_delim(paste0("../Senescence/human/",p), 
                          "\t", escape_double = FALSE, trim_ws = TRUE)
    genes <- DEG %>% filter(Comparison == i) %>% nrow()
    sig_genes <- DEG_sig %>% filter(Comparison == i) %>% nrow()
    pathway_back_genes <- DEG %>% filter(Comparison == i) %>% 
                          filter(Gene_Symbol %in% pathway$Gene_Symbol) %>% nrow()
    pathway_sig_genes <- DEG_sig %>% filter(Comparison == i) %>% 
                         filter(Gene_Symbol %in% pathway$Gene_Symbol) %>% nrow()
    enrichment <- data.frame(rbind(Background=c(genes-sig_genes, sig_genes), 
                  Foreground= c(pathway_back_genes-pathway_sig_genes, pathway_sig_genes)))
    names(enrichment) <- c("Nonsignificant","Significant") 
    data.frame("Comparison" = i,
               "Genes" = genes,
               "Sig_Genes" = sig_genes,
               "Pathway" = p, 
               "Pathway_Genes" = nrow(pathway), 
               "Pathway_Back_Genes" = pathway_back_genes,
               "Pathway_Sig_Genes" = pathway_sig_genes,
               "Fisher_Pvalue" = fisher.test(enrichment, alternative="greater")$p.value,
               "Fisher_OddsRatio" = fisher.test(enrichment, alternative="greater")$estimate)
  })
}) %>% bind_rows()

# save the results
write_delim(senescence_enrichment, 
  "PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_Senescence_Pathways.txt", 
  delim = "\t")
# Read Senescence_Pathways
senescence_enrichment <- 
  read_delim("PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_Senescence_Pathways.txt", 
                            "\t", escape_double = FALSE, trim_ws = TRUE)

# Searchable table
senescence_enrichment %>% 
  arrange(Fisher_Pvalue) %>% 
  mutate(Fisher_Pvalue = round(Fisher_Pvalue, 4), 
         Fisher_OddsRatio = round(Fisher_OddsRatio, 2)) %>% 
  DT::datatable()

Seurat Differential_Expression_Lysosome_Pathways

Lysosome Pathways

# Read Marker files
DEG_sig <- DEG %>% filter(p_val_adj <= 0.05 & avg_logFC >= 0.25 | 
                                p_val_adj <= 0.05 & avg_logFC <= -0.25)

# Lysosome_Pathways
comparison_names <- unique(DEG$Comparison)
pathways_names <- list.files("lysosomal_pathways/")

lysosome_enrichment <- mclapply(comparison_names, function(i) { 
  mclapply(pathways_names, function(p) { 
    pathway <- read_delim(paste0("lysosomal_pathways/",p), 
                          "\t", escape_double = FALSE, trim_ws = TRUE)
    genes <- DEG %>% filter(Comparison == i) %>% nrow()
    sig_genes <- DEG_sig %>% filter(Comparison == i) %>% nrow()
    pathway_back_genes <- DEG %>% filter(Comparison == i) %>% 
                          filter(Gene_Symbol %in% pathway$Gene_Symbol) %>% nrow()
    pathway_sig_genes <- DEG_sig %>% filter(Comparison == i) %>% 
                         filter(Gene_Symbol %in% pathway$Gene_Symbol) %>% nrow()
    enrichment <- data.frame(rbind(Background=c(genes-sig_genes, sig_genes), 
                  Foreground= c(pathway_back_genes-pathway_sig_genes, pathway_sig_genes)))
    names(enrichment) <- c("Nonsignificant","Significant") 
    data.frame("Comparison" = i,
               "Genes" = genes,
               "Sig_Genes" = sig_genes,
               "Pathway" = p, 
               "Pathway_Genes" = nrow(pathway), 
               "Pathway_Back_Genes" = pathway_back_genes,
               "Pathway_Sig_Genes" = pathway_sig_genes,
               "Fisher_Pvalue" = fisher.test(enrichment, alternative="greater")$p.value,
               "Fisher_OddsRatio" = fisher.test(enrichment, alternative="greater")$estimate)
  })
}) %>% bind_rows()

# save the results
write_delim(lysosome_enrichment, 
  "PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_Lysosome_Pathways.txt", 
  delim = "\t")
# Read Senescence_Pathways
lysosome_enrichment <- 
  read_delim("PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_Lysosome_Pathways.txt", 
                            "\t", escape_double = FALSE, trim_ws = TRUE)

# Searchable table
lysosome_enrichment %>% 
  arrange(Fisher_Pvalue) %>% 
  mutate(Fisher_Pvalue = round(Fisher_Pvalue, 4), 
         Fisher_OddsRatio = round(Fisher_OddsRatio, 2)) %>% 
  DT::datatable()

Seurat Differential_Expression_GSEA

#---------- Differential_Expression_GSEA
# EDIT GMT FILE cut -f 2 -d$'\t' --complement
gmt_file <- gmtPathways("Human_GOBP_AllPathways_no_GO_iea_January_13_2021_symbol_fgsea.gmt")

comparison_names <- unique(DEG$Comparison)

fgsea_enrichment <- mclapply(comparison_names, function(i) {
  rnk_file <- DEG %>% 
    dplyr::filter(Comparison == i) %>% 
    dplyr::select(Comparison, 
           Gene_Symbol, 
           p_val, 
           avg_logFC) %>% 
    dplyr::mutate(sign_log10Pvalue = sign(avg_logFC) * -log10(p_val)) %>% 
    dplyr::arrange(sign_log10Pvalue)
  rnk_file <- setNames(rnk_file$sign_log10Pvalue, rnk_file$Gene_Symbol)
  fgsea_rnk <- fgsea(gmt_file, rnk_file, nperm=1000, minSize=10, maxSize=500)
  fgsea_rnk %>% 
    dplyr::mutate(Comparison = i) %>% 
    dplyr::select(-leadingEdge)
}) %>% bind_rows()

# save the results
write_delim(fgsea_enrichment, 
  "PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_fgsea_enrichment.txt", 
  delim = "\t")
# Read fgsea_enrichment
fgsea_enrichment <- 
  read_delim("PD_midbrain_snRNAseq_GSE157783_IPDvsHC_DEG_MAST_fgsea_enrichment.txt", 
                            "\t", escape_double = FALSE, trim_ws = TRUE)

# Number of Pathways
fgsea_enrichment %>% 
  dplyr::filter(padj <= 0.05) %>% 
  dplyr::count(Comparison) %>%
  dplyr::rename(Pathways_FDR0.05 = n) %>% 
  kable(caption="Number of Pathways", 
        format.args = list(big.mark = ",")) %>% 
  kable_styling(c("striped", "bordered"))
Number of Pathways
Comparison Pathways_FDR0.05
# Searchable table
fgsea_enrichment %>% 
  dplyr::filter(padj <= 0.05) %>% 
  dplyr::arrange(padj) %>% 
  dplyr::select(Comparison, 
         pathway, 
         pval, 
         padj, 
         ES, 
         NES) %>% 
  mutate_if(is.numeric, ~round(.,4)) %>% 
  DT::datatable()

Seurat Senescence_Markers

Cdkn2a used as a marker for senescence

# Cell clusters
Idents(seurat_anno) <- seurat_anno@meta.data$cluster_annotation

# UMAP cell clusters
DimPlot(seurat_anno, 
        reduction="umap", 
        label = TRUE, 
        label.size = 3, 
        repel = TRUE, 
        split.by = "group")

# Cellular Senescence Markers p16-Ink4a
FeaturePlot(seurat_anno, 
            reduction = "umap", 
            features = c("CDKN2A"), 
            split.by = "group", 
            label = FALSE)

Idents(seurat_anno, WhichCells(object = seurat_anno, 
                               expression = CDKN2A > 1, 
                               slot = 'data')) <- 'CDKN2A_pos'
Idents(seurat_anno, WhichCells(object = seurat_anno, 
                               expression = CDKN2A <= 1, 
                               slot = 'data')) <- 'CDKN2A_neg'

seurat_anno@meta.data$CDKN2A <- Idents(seurat_anno) 
cell_types <- unique(seurat_anno$cluster_annotation)

CDKN2A <- lapply(cell_types, function(i) { 
  IPD_neg <- seurat_anno@meta.data %>% 
             filter(cluster_annotation == i & group == "IPD" & 
             CDKN2A == "CDKN2A_neg") %>% nrow()
  IPD_pos <- seurat_anno@meta.data %>% 
             filter(cluster_annotation == i & group == "IPD" & 
             CDKN2A == "CDKN2A_pos") %>% nrow()
  HC_neg <- seurat_anno@meta.data %>% 
               filter(cluster_annotation == i & group == "HC" & 
               CDKN2A == "CDKN2A_neg") %>% nrow() 
  HC_pos <- seurat_anno@meta.data %>% 
               filter(cluster_annotation == i & group == "HC" & 
               CDKN2A == "CDKN2A_pos") %>% nrow()
  data.frame("Comparison" = i,
             "IPD_CDKN2A_neg" = IPD_neg,
             "IPD_CDKN2A_pos" = IPD_pos,
             "HC_CDKN2A_neg" = HC_neg, 
             "HC_CDKN2A_pos" = HC_pos, 
             "IPD_CDKN2A_Percentage" = IPD_pos/(IPD_neg+IPD_pos)*100,
             "HC_CDKN2A_Percentage" = HC_pos/(HC_neg+HC_pos)*100)
}) %>% bind_rows()

CDKN2A %>% 
  mutate(IPD_CDKN2A_Percentage = round(IPD_CDKN2A_Percentage, 4), 
         HC_CDKN2A_Percentage = round(HC_CDKN2A_Percentage, 4)) %>% 
  DT::datatable()
# Boxplot of CDKN2A
df <- cbind(seurat_anno@meta.data %>% 
  dplyr::select(cluster_annotation, CDKN2A, group, sample),
  data = seurat_anno@assays$RNA@data["CDKN2A",])

ggboxplot(df, x = "group", y = "data",
                color = "group", palette =c("#00AFBB", "#E7B800"),
                add = "jitter", shape = "group", size = 0.2, 
          title = "CDKN2A normalized counts", facet.by = "cluster_annotation")

#---------- Senescence Markers p21 Wafl/Clp1/Sdi1
FeaturePlot(seurat_anno, 
            reduction = "umap", 
            features = c("CDKN1A"), 
            split.by = "group", 
            label = FALSE)

Idents(seurat_anno, WhichCells(object = seurat_anno, 
                               expression = CDKN1A > 1, 
                               slot = 'data')) <- 'CDKN1A_pos'
Idents(seurat_anno, WhichCells(object = seurat_anno, 
                               expression = CDKN1A <= 1, 
                               slot = 'data')) <- 'CDKN1A_neg'

seurat_anno@meta.data$CDKN1A <- Idents(seurat_anno) 
cell_types <- unique(seurat_anno$cluster_annotation)

CDKN1A <- lapply(cell_types, function(i) { 
  IPD_neg <- seurat_anno@meta.data %>% 
             filter(cluster_annotation == i & group == "IPD" & 
             CDKN1A == "CDKN1A_neg") %>% nrow()
  IPD_pos <- seurat_anno@meta.data %>% 
             filter(cluster_annotation == i & group == "IPD" & 
             CDKN1A == "CDKN1A_pos") %>% nrow()
  HC_neg <- seurat_anno@meta.data %>% 
               filter(cluster_annotation == i & group == "HC" & 
               CDKN1A == "CDKN1A_neg") %>% nrow() 
  HC_pos <- seurat_anno@meta.data %>% 
               filter(cluster_annotation == i & group == "HC" & 
               CDKN1A == "CDKN1A_pos") %>% nrow()
  data.frame("Comparison" = i,
             "IPD_CDKN1A_neg" = IPD_neg,
             "IPD_CDKN1A_pos" = IPD_pos,
             "HC_CDKN1A_neg" = HC_neg, 
             "HC_CDKN1A_pos" = HC_pos, 
             "IPD_CDKN1A_Percentage" = IPD_pos/(IPD_neg+IPD_pos)*100,
             "HC_CDKN1A_Percentage" = HC_pos/(HC_neg+HC_pos)*100)
}) %>% bind_rows()

CDKN1A %>% 
  mutate(IPD_CDKN1A_Percentage = round(IPD_CDKN1A_Percentage, 4), 
         HC_CDKN1A_Percentage = round(HC_CDKN1A_Percentage, 4)) %>% 
  DT::datatable()
# Boxplot of CDKN1A
df <- cbind(seurat_anno@meta.data %>% 
  dplyr::select(cluster_annotation, CDKN1A, group, sample),
  data = seurat_anno@assays$RNA@data["CDKN1A",])

ggboxplot(df, x = "group", y = "data",
                color = "group", palette =c("#00AFBB", "#E7B800"),
                add = "jitter", shape = "group", size = 0.2, 
          title = "CDKN1A normalized counts", facet.by = "cluster_annotation")

#---------- Senescence Markers p53 TP53
FeaturePlot(seurat_anno, 
            reduction = "umap", 
            features = c("TP53"), 
            split.by = "group", 
            label = FALSE)

Idents(seurat_anno, WhichCells(object = seurat_anno, 
                               expression = TP53 > 1, 
                               slot = 'data')) <- 'TP53_pos'
Idents(seurat_anno, WhichCells(object = seurat_anno, 
                               expression = TP53 <= 1, 
                               slot = 'data')) <- 'TP53_neg'

seurat_anno@meta.data$TP53 <- Idents(seurat_anno) 
cell_types <- unique(seurat_anno$cluster_annotation)

TP53 <- lapply(cell_types, function(i) { 
  IPD_neg <- seurat_anno@meta.data %>% 
             filter(cluster_annotation == i & group == "IPD" & 
             TP53 == "TP53_neg") %>% nrow()
  IPD_pos <- seurat_anno@meta.data %>% 
             filter(cluster_annotation == i & group == "IPD" & 
             TP53 == "TP53_pos") %>% nrow()
  HC_neg <- seurat_anno@meta.data %>% 
               filter(cluster_annotation == i & group == "HC" & 
               TP53 == "TP53_neg") %>% nrow() 
  HC_pos <- seurat_anno@meta.data %>% 
               filter(cluster_annotation == i & group == "HC" & 
               TP53 == "TP53_pos") %>% nrow()
  data.frame("Comparison" = i,
             "IPD_TP53_neg" = IPD_neg,
             "IPD_TP53_pos" = IPD_pos,
             "HC_TP53_neg" = HC_neg, 
             "HC_TP53_pos" = HC_pos, 
             "IPD_TP53_Percentage" = IPD_pos/(IPD_neg+IPD_pos)*100,
             "HC_TP53_Percentage" = HC_pos/(HC_neg+HC_pos)*100)
}) %>% bind_rows()

TP53 %>% 
  mutate(IPD_TP53_Percentage = round(IPD_TP53_Percentage, 4), 
         HC_TP53_Percentage = round(HC_TP53_Percentage, 4)) %>% 
  DT::datatable()
# Boxplot of TP53
df <- cbind(seurat_anno@meta.data %>% 
  dplyr::select(cluster_annotation, TP53, group, sample),
  data = seurat_anno@assays$RNA@data["TP53",])

ggboxplot(df, x = "group", y = "data",
                color = "group", palette =c("#00AFBB", "#E7B800"),
                add = "jitter", shape = "group", size = 0.2, 
          title = "TP53 normalized counts", facet.by = "cluster_annotation")

#---------- Senescence Markers Beta-galactosidase GLB1
FeaturePlot(seurat_anno, 
            reduction = "umap", 
            features = c("GLB1"), 
            split.by = "group", 
            label = FALSE)

Idents(seurat_anno, WhichCells(object = seurat_anno, 
                               expression = GLB1 > 1, 
                               slot = 'data')) <- 'GLB1_pos'
Idents(seurat_anno, WhichCells(object = seurat_anno, 
                               expression = GLB1 <= 1, 
                               slot = 'data')) <- 'GLB1_neg'

seurat_anno@meta.data$GLB1 <- Idents(seurat_anno) 
cell_types <- unique(seurat_anno$cluster_annotation)

GLB1 <- lapply(cell_types, function(i) { 
  IPD_neg <- seurat_anno@meta.data %>% 
             filter(cluster_annotation == i & group == "IPD" & 
             GLB1 == "GLB1_neg") %>% nrow()
  IPD_pos <- seurat_anno@meta.data %>% 
             filter(cluster_annotation == i & group == "IPD" & 
             GLB1 == "GLB1_pos") %>% nrow()
  HC_neg <- seurat_anno@meta.data %>% 
               filter(cluster_annotation == i & group == "HC" & 
               GLB1 == "GLB1_neg") %>% nrow() 
  HC_pos <- seurat_anno@meta.data %>% 
               filter(cluster_annotation == i & group == "HC" & 
               GLB1 == "GLB1_pos") %>% nrow()
  data.frame("Comparison" = i,
             "IPD_GLB1_neg" = IPD_neg,
             "IPD_GLB1_pos" = IPD_pos,
             "HC_GLB1_neg" = HC_neg, 
             "HC_GLB1_pos" = HC_pos, 
             "IPD_GLB1_Percentage" = IPD_pos/(IPD_neg+IPD_pos)*100,
             "HC_GLB1_Percentage" = HC_pos/(HC_neg+HC_pos)*100)
}) %>% bind_rows()

GLB1 %>% 
  mutate(IPD_GLB1_Percentage = round(IPD_GLB1_Percentage, 4), 
         HC_GLB1_Percentage = round(HC_GLB1_Percentage, 4)) %>% 
  DT::datatable()
# Boxplot of GLB1
df <- cbind(seurat_anno@meta.data %>% 
  dplyr::select(cluster_annotation, GLB1, group, sample),
  data = seurat_anno@assays$RNA@data["GLB1",])

ggboxplot(df, x = "group", y = "data",
                color = "group", palette =c("#00AFBB", "#E7B800"),
                add = "jitter", shape = "group", size = 0.2, 
          title = "GLB1 normalized counts", facet.by = "cluster_annotation")